Description:
- add isolate
- more comment and output for each script
Commit status:
[Not Reviewed]
References:
Diff options:
Comments:
0 Commit comments
0 Inline Comments
Unresolved TODOs:
There are no unresolved TODOs
r256:78af34fd4a2e - - 21 files changed: 3167 inserted, 31 deleted
@@ -0,0 +1,17 | |||||
|
|
1 | + language: c | ||
|
|
2 | + | ||
|
|
3 | + compiler: gcc | ||
|
|
4 | + | ||
|
|
5 | + addons: | ||
|
|
6 | + apt: | ||
|
|
7 | + packages: | ||
|
|
8 | + - asciidoc | ||
|
|
9 | + - libcap-dev | ||
|
|
10 | + - libxml2-utils | ||
|
|
11 | + - xsltproc | ||
|
|
12 | + - docbook-xml | ||
|
|
13 | + - docbook-xsl | ||
|
|
14 | + | ||
|
|
15 | + script: | ||
|
|
16 | + - make DESTDIR=/tmp/isolate | ||
|
|
17 | + - make DESTDIR=/tmp/isolate install |
@@ -0,0 +1,12 | |||||
|
|
1 | + Isolate is free software: you can redistribute it and/or modify | ||
|
|
2 | + it under the terms of the GNU General Public License as published by | ||
|
|
3 | + the Free Software Foundation, either version 2 of the License, or | ||
|
|
4 | + (at your option) any later version. | ||
|
|
5 | + | ||
|
|
6 | + This program is distributed in the hope that it will be useful, | ||
|
|
7 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|
|
8 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
|
|
9 | + GNU General Public License for more details. | ||
|
|
10 | + | ||
|
|
11 | + If you have less than 10 copies of the GPL on your system :-), | ||
|
|
12 | + you can find it at http://www.gnu.org/licenses/. |
@@ -0,0 +1,67 | |||||
|
|
1 | + # Makefile for Isolate | ||
|
|
2 | + # (c) 2015--2018 Martin Mares <mj@ucw.cz> | ||
|
|
3 | + # (c) 2017 Bernard Blackham <bernard@blackham.com.au> | ||
|
|
4 | + | ||
|
|
5 | + all: isolate isolate.1 isolate.1.html isolate-check-environment | ||
|
|
6 | + | ||
|
|
7 | + CC=gcc | ||
|
|
8 | + CFLAGS=-std=gnu99 -Wall -Wextra -Wno-parentheses -Wno-unused-result -Wno-missing-field-initializers -Wstrict-prototypes -Wmissing-prototypes -D_GNU_SOURCE | ||
|
|
9 | + LIBS=-lcap | ||
|
|
10 | + | ||
|
|
11 | + VERSION=1.5 | ||
|
|
12 | + YEAR=2018 | ||
|
|
13 | + BUILD_DATE:=$(shell date '+%Y-%m-%d') | ||
|
|
14 | + BUILD_COMMIT:=$(shell if git rev-parse >/dev/null 2>/dev/null ; then git describe --always --tags ; else echo '<unknown>' ; fi) | ||
|
|
15 | + | ||
|
|
16 | + PREFIX = $(DESTDIR)/usr/local | ||
|
|
17 | + VARPREFIX = $(DESTDIR)/var/local | ||
|
|
18 | + CONFIGDIR = $(PREFIX)/etc | ||
|
|
19 | + CONFIG = $(CONFIGDIR)/isolate | ||
|
|
20 | + BINDIR = $(PREFIX)/bin | ||
|
|
21 | + DATAROOTDIR = $(PREFIX)/share | ||
|
|
22 | + DATADIR = $(DATAROOTDIR) | ||
|
|
23 | + MANDIR = $(DATADIR)/man | ||
|
|
24 | + MAN1DIR = $(MANDIR)/man1 | ||
|
|
25 | + BOXDIR = $(VARPREFIX)/lib/isolate | ||
|
|
26 | + | ||
|
|
27 | + isolate: isolate.o util.o rules.o cg.o config.o | ||
|
|
28 | + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) | ||
|
|
29 | + | ||
|
|
30 | + %.o: %.c isolate.h config.h | ||
|
|
31 | + $(CC) $(CFLAGS) -c -o $@ $< | ||
|
|
32 | + | ||
|
|
33 | + isolate.o: CFLAGS += -DVERSION='"$(VERSION)"' -DYEAR='"$(YEAR)"' -DBUILD_DATE='"$(BUILD_DATE)"' -DBUILD_COMMIT='"$(BUILD_COMMIT)"' | ||
|
|
34 | + config.o: CFLAGS += -DCONFIG_FILE='"$(CONFIG)"' | ||
|
|
35 | + | ||
|
|
36 | + isolate.1: isolate.1.txt | ||
|
|
37 | + a2x -f manpage $< | ||
|
|
38 | + | ||
|
|
39 | + # The dependency on isolate.1 is there to serialize both calls of asciidoc, | ||
|
|
40 | + # which does not name temporary files safely. | ||
|
|
41 | + isolate.1.html: isolate.1.txt isolate.1 | ||
|
|
42 | + a2x -f xhtml -D . $< | ||
|
|
43 | + | ||
|
|
44 | + clean: | ||
|
|
45 | + rm -f *.o | ||
|
|
46 | + rm -f isolate isolate.1 isolate.1.html | ||
|
|
47 | + rm -f docbook-xsl.css | ||
|
|
48 | + | ||
|
|
49 | + install: isolate isolate-check-environment | ||
|
|
50 | + install -d $(BINDIR) $(BOXDIR) $(CONFIGDIR) | ||
|
|
51 | + install isolate-check-environment $(BINDIR) | ||
|
|
52 | + install -m 4755 isolate $(BINDIR) | ||
|
|
53 | + install -m 644 default.cf $(CONFIG) | ||
|
|
54 | + | ||
|
|
55 | + install-doc: isolate.1 | ||
|
|
56 | + install -d $(MAN1DIR) | ||
|
|
57 | + install -m 644 $< $(MAN1DIR)/$< | ||
|
|
58 | + | ||
|
|
59 | + release: isolate.1.html | ||
|
|
60 | + git tag v$(VERSION) | ||
|
|
61 | + git push --tags | ||
|
|
62 | + git archive --format=tar --prefix=isolate-$(VERSION)/ HEAD | gzip >isolate-$(VERSION).tar.gz | ||
|
|
63 | + rsync isolate-$(VERSION).tar.gz atrey:ftp/isolate/ | ||
|
|
64 | + rsync isolate.1.html jw:/var/www/moe/ | ||
|
|
65 | + ssh jw 'cd web && bin/release-prog isolate $(VERSION)' | ||
|
|
66 | + | ||
|
|
67 | + .PHONY: all clean install install-doc release |
@@ -0,0 +1,29 | |||||
|
|
1 | + isolate | ||
|
|
2 | + ======= | ||
|
|
3 | + | ||
|
|
4 | + Isolate is a sandbox built to safely run untrusted executables, | ||
|
|
5 | + offering them a limited-access environment and preventing them from | ||
|
|
6 | + affecting the host system. It takes advantage of features specific to | ||
|
|
7 | + the Linux kernel, like namespaces and control groups. | ||
|
|
8 | + | ||
|
|
9 | + Isolate was developed by Martin Mareš (<mj@ucw.cz>) and Bernard Blackham | ||
|
|
10 | + (<bernard@blackham.com.au>), who still maintain it. Several other people | ||
|
|
11 | + contributed patches for features and bug fixes (see Git history for a list). | ||
|
|
12 | + Thanks! | ||
|
|
13 | + | ||
|
|
14 | + Originally, Isolate was a part of the [Moe Contest Environment](http://www.ucw.cz/moe/), | ||
|
|
15 | + but it evolved to a separate project used by different | ||
|
|
16 | + contest systems, most prominently [CMS](https://github.com/cms-dev/cms). | ||
|
|
17 | + It now lives at [GitHub](https://github.com/ioi/isolate), | ||
|
|
18 | + where you can submit bug reports and feature requests. | ||
|
|
19 | + | ||
|
|
20 | + If you are interested in more details, please read Martin's | ||
|
|
21 | + and Bernard's [paper](http://mj.ucw.cz/papers/isolate.pdf) presented | ||
|
|
22 | + at the IOI Conference. Also, Isolate's [manual page](http://www.ucw.cz/moe/isolate.1.html) | ||
|
|
23 | + is available online. | ||
|
|
24 | + | ||
|
|
25 | + To compile Isolate, you need the headers for the libcap library | ||
|
|
26 | + (usually available in a libcap-dev package). | ||
|
|
27 | + | ||
|
|
28 | + You may need `a2x` (found in [AsciiDoc](http://www.methods.co.nz/asciidoc/a2x.1.html)) for building manual. | ||
|
|
29 | + But if you only want the isolate binary, you can just run `make isolate` |
@@ -0,0 +1,1 | |||||
|
|
1 | + Examine the use of taskstats for measuring memory |
@@ -0,0 +1,327 | |||||
|
|
1 | + /* | ||
|
|
2 | + * Process Isolator -- Control Groups | ||
|
|
3 | + * | ||
|
|
4 | + * (c) 2012-2016 Martin Mares <mj@ucw.cz> | ||
|
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | ||
|
|
6 | + */ | ||
|
|
7 | + | ||
|
|
8 | + #include "isolate.h" | ||
|
|
9 | + | ||
|
|
10 | + #include <assert.h> | ||
|
|
11 | + #include <errno.h> | ||
|
|
12 | + #include <fcntl.h> | ||
|
|
13 | + #include <stdio.h> | ||
|
|
14 | + #include <stdlib.h> | ||
|
|
15 | + #include <string.h> | ||
|
|
16 | + #include <sys/stat.h> | ||
|
|
17 | + #include <unistd.h> | ||
|
|
18 | + | ||
|
|
19 | + struct cg_controller_desc { | ||
|
|
20 | + const char *name; | ||
|
|
21 | + int optional; | ||
|
|
22 | + }; | ||
|
|
23 | + | ||
|
|
24 | + typedef enum { | ||
|
|
25 | + CG_MEMORY = 0, | ||
|
|
26 | + CG_CPUACCT, | ||
|
|
27 | + CG_CPUSET, | ||
|
|
28 | + CG_NUM_CONTROLLERS, | ||
|
|
29 | + CG_PARENT = 256, | ||
|
|
30 | + } cg_controller; | ||
|
|
31 | + | ||
|
|
32 | + static const struct cg_controller_desc cg_controllers[CG_NUM_CONTROLLERS+1] = { | ||
|
|
33 | + [CG_MEMORY] = { "memory", 0 }, | ||
|
|
34 | + [CG_CPUACCT] = { "cpuacct", 0 }, | ||
|
|
35 | + [CG_CPUSET] = { "cpuset", 1 }, | ||
|
|
36 | + [CG_NUM_CONTROLLERS] = { NULL, 0 }, | ||
|
|
37 | + }; | ||
|
|
38 | + | ||
|
|
39 | + #define FOREACH_CG_CONTROLLER(_controller) \ | ||
|
|
40 | + for (cg_controller (_controller) = 0; \ | ||
|
|
41 | + (_controller) < CG_NUM_CONTROLLERS; (_controller)++) | ||
|
|
42 | + | ||
|
|
43 | + static const char * | ||
|
|
44 | + cg_controller_name(cg_controller c) | ||
|
|
45 | + { | ||
|
|
46 | + assert(c < CG_NUM_CONTROLLERS); | ||
|
|
47 | + return cg_controllers[c].name; | ||
|
|
48 | + } | ||
|
|
49 | + | ||
|
|
50 | + static int | ||
|
|
51 | + cg_controller_optional(cg_controller c) | ||
|
|
52 | + { | ||
|
|
53 | + assert(c < CG_NUM_CONTROLLERS); | ||
|
|
54 | + return cg_controllers[c].optional; | ||
|
|
55 | + } | ||
|
|
56 | + | ||
|
|
57 | + static char cg_name[256]; | ||
|
|
58 | + static char cg_parent_name[256]; | ||
|
|
59 | + | ||
|
|
60 | + #define CG_BUFSIZE 1024 | ||
|
|
61 | + | ||
|
|
62 | + static void | ||
|
|
63 | + cg_makepath(char *buf, size_t len, cg_controller c, const char *attr) | ||
|
|
64 | + { | ||
|
|
65 | + snprintf(buf, len, "%s/%s/%s/%s", | ||
|
|
66 | + cf_cg_root, | ||
|
|
67 | + cg_controller_name(c & ~CG_PARENT), | ||
|
|
68 | + (c & CG_PARENT) ? cg_parent_name : cg_name, | ||
|
|
69 | + attr); | ||
|
|
70 | + } | ||
|
|
71 | + | ||
|
|
72 | + static int | ||
|
|
73 | + cg_read(cg_controller controller, const char *attr, char *buf) | ||
|
|
74 | + { | ||
|
|
75 | + int result = 0; | ||
|
|
76 | + int maybe = 0; | ||
|
|
77 | + if (attr[0] == '?') | ||
|
|
78 | + { | ||
|
|
79 | + attr++; | ||
|
|
80 | + maybe = 1; | ||
|
|
81 | + } | ||
|
|
82 | + | ||
|
|
83 | + char path[256]; | ||
|
|
84 | + cg_makepath(path, sizeof(path), controller, attr); | ||
|
|
85 | + | ||
|
|
86 | + int fd = open(path, O_RDONLY); | ||
|
|
87 | + if (fd < 0) | ||
|
|
88 | + { | ||
|
|
89 | + if (maybe) | ||
|
|
90 | + goto fail; | ||
|
|
91 | + die("Cannot read %s: %m", path); | ||
|
|
92 | + } | ||
|
|
93 | + | ||
|
|
94 | + int n = read(fd, buf, CG_BUFSIZE); | ||
|
|
95 | + if (n < 0) | ||
|
|
96 | + { | ||
|
|
97 | + if (maybe) | ||
|
|
98 | + goto fail_close; | ||
|
|
99 | + die("Cannot read %s: %m", path); | ||
|
|
100 | + } | ||
|
|
101 | + if (n >= CG_BUFSIZE - 1) | ||
|
|
102 | + die("Attribute %s too long", path); | ||
|
|
103 | + if (n > 0 && buf[n-1] == '\n') | ||
|
|
104 | + n--; | ||
|
|
105 | + buf[n] = 0; | ||
|
|
106 | + | ||
|
|
107 | + if (verbose > 1) | ||
|
|
108 | + msg("CG: Read %s = <%s>\n", attr, buf); | ||
|
|
109 | + | ||
|
|
110 | + result = 1; | ||
|
|
111 | + fail_close: | ||
|
|
112 | + close(fd); | ||
|
|
113 | + fail: | ||
|
|
114 | + return result; | ||
|
|
115 | + } | ||
|
|
116 | + | ||
|
|
117 | + static void __attribute__((format(printf,3,4))) | ||
|
|
118 | + cg_write(cg_controller controller, const char *attr, const char *fmt, ...) | ||
|
|
119 | + { | ||
|
|
120 | + int maybe = 0; | ||
|
|
121 | + if (attr[0] == '?') | ||
|
|
122 | + { | ||
|
|
123 | + attr++; | ||
|
|
124 | + maybe = 1; | ||
|
|
125 | + } | ||
|
|
126 | + | ||
|
|
127 | + va_list args; | ||
|
|
128 | + va_start(args, fmt); | ||
|
|
129 | + | ||
|
|
130 | + char buf[CG_BUFSIZE]; | ||
|
|
131 | + int n = vsnprintf(buf, sizeof(buf), fmt, args); | ||
|
|
132 | + if (n >= CG_BUFSIZE) | ||
|
|
133 | + die("cg_write: Value for attribute %s is too long", attr); | ||
|
|
134 | + | ||
|
|
135 | + if (verbose > 1) | ||
|
|
136 | + msg("CG: Write %s = %s", attr, buf); | ||
|
|
137 | + | ||
|
|
138 | + char path[256]; | ||
|
|
139 | + cg_makepath(path, sizeof(path), controller, attr); | ||
|
|
140 | + | ||
|
|
141 | + int fd = open(path, O_WRONLY | O_TRUNC); | ||
|
|
142 | + if (fd < 0) | ||
|
|
143 | + { | ||
|
|
144 | + if (maybe) | ||
|
|
145 | + goto fail; | ||
|
|
146 | + else | ||
|
|
147 | + die("Cannot write %s: %m", path); | ||
|
|
148 | + } | ||
|
|
149 | + | ||
|
|
150 | + int written = write(fd, buf, n); | ||
|
|
151 | + if (written < 0) | ||
|
|
152 | + { | ||
|
|
153 | + if (maybe) | ||
|
|
154 | + goto fail_close; | ||
|
|
155 | + else | ||
|
|
156 | + die("Cannot set %s to %s: %m", path, buf); | ||
|
|
157 | + } | ||
|
|
158 | + if (written != n) | ||
|
|
159 | + die("Short write to %s (%d out of %d bytes)", path, written, n); | ||
|
|
160 | + | ||
|
|
161 | + fail_close: | ||
|
|
162 | + close(fd); | ||
|
|
163 | + fail: | ||
|
|
164 | + va_end(args); | ||
|
|
165 | + } | ||
|
|
166 | + | ||
|
|
167 | + void | ||
|
|
168 | + cg_init(void) | ||
|
|
169 | + { | ||
|
|
170 | + if (!cg_enable) | ||
|
|
171 | + return; | ||
|
|
172 | + | ||
|
|
173 | + if (!dir_exists(cf_cg_root)) | ||
|
|
174 | + die("Control group filesystem at %s not mounted", cf_cg_root); | ||
|
|
175 | + | ||
|
|
176 | + if (cf_cg_parent) | ||
|
|
177 | + { | ||
|
|
178 | + snprintf(cg_name, sizeof(cg_name), "%s/box-%d", cf_cg_parent, box_id); | ||
|
|
179 | + snprintf(cg_parent_name, sizeof(cg_parent_name), "%s", cf_cg_parent); | ||
|
|
180 | + } | ||
|
|
181 | + else | ||
|
|
182 | + { | ||
|
|
183 | + snprintf(cg_name, sizeof(cg_name), "box-%d", box_id); | ||
|
|
184 | + strcpy(cg_parent_name, "."); | ||
|
|
185 | + } | ||
|
|
186 | + msg("Using control group %s under parent %s\n", cg_name, cg_parent_name); | ||
|
|
187 | + } | ||
|
|
188 | + | ||
|
|
189 | + void | ||
|
|
190 | + cg_prepare(void) | ||
|
|
191 | + { | ||
|
|
192 | + if (!cg_enable) | ||
|
|
193 | + return; | ||
|
|
194 | + | ||
|
|
195 | + struct stat st; | ||
|
|
196 | + char buf[CG_BUFSIZE]; | ||
|
|
197 | + char path[256]; | ||
|
|
198 | + | ||
|
|
199 | + FOREACH_CG_CONTROLLER(controller) | ||
|
|
200 | + { | ||
|
|
201 | + cg_makepath(path, sizeof(path), controller, ""); | ||
|
|
202 | + if (stat(path, &st) >= 0 || errno != ENOENT) | ||
|
|
203 | + { | ||
|
|
204 | + msg("Control group %s already exists, trying to empty it.\n", path); | ||
|
|
205 | + if (rmdir(path) < 0) | ||
|
|
206 | + die("Failed to reset control group %s: %m", path); | ||
|
|
207 | + } | ||
|
|
208 | + | ||
|
|
209 | + if (mkdir(path, 0777) < 0 && !cg_controller_optional(controller)) | ||
|
|
210 | + die("Failed to create control group %s: %m", path); | ||
|
|
211 | + } | ||
|
|
212 | + | ||
|
|
213 | + // If the cpuset module is enabled, set up allowed cpus and memory nodes. | ||
|
|
214 | + // If per-box configuration exists, use it; otherwise, inherit the settings | ||
|
|
215 | + // from the parent cgroup. | ||
|
|
216 | + struct cf_per_box *cf = cf_current_box(); | ||
|
|
217 | + if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.cpus", buf)) | ||
|
|
218 | + cg_write(CG_CPUSET, "cpuset.cpus", "%s", cf->cpus ? cf->cpus : buf); | ||
|
|
219 | + if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.mems", buf)) | ||
|
|
220 | + cg_write(CG_CPUSET, "cpuset.mems", "%s", cf->mems ? cf->mems : buf); | ||
|
|
221 | + } | ||
|
|
222 | + | ||
|
|
223 | + void | ||
|
|
224 | + cg_enter(void) | ||
|
|
225 | + { | ||
|
|
226 | + if (!cg_enable) | ||
|
|
227 | + return; | ||
|
|
228 | + | ||
|
|
229 | + msg("Entering control group %s\n", cg_name); | ||
|
|
230 | + | ||
|
|
231 | + FOREACH_CG_CONTROLLER(controller) | ||
|
|
232 | + { | ||
|
|
233 | + if (cg_controller_optional(controller)) | ||
|
|
234 | + cg_write(controller, "?tasks", "%d\n", (int) getpid()); | ||
|
|
235 | + else | ||
|
|
236 | + cg_write(controller, "tasks", "%d\n", (int) getpid()); | ||
|
|
237 | + } | ||
|
|
238 | + | ||
|
|
239 | + if (cg_memory_limit) | ||
|
|
240 | + { | ||
|
|
241 | + cg_write(CG_MEMORY, "memory.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10); | ||
|
|
242 | + cg_write(CG_MEMORY, "?memory.memsw.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10); | ||
|
|
243 | + cg_write(CG_MEMORY, "memory.max_usage_in_bytes", "0\n"); | ||
|
|
244 | + cg_write(CG_MEMORY, "?memory.memsw.max_usage_in_bytes", "0\n"); | ||
|
|
245 | + } | ||
|
|
246 | + | ||
|
|
247 | + if (cg_timing) | ||
|
|
248 | + cg_write(CG_CPUACCT, "cpuacct.usage", "0\n"); | ||
|
|
249 | + } | ||
|
|
250 | + | ||
|
|
251 | + int | ||
|
|
252 | + cg_get_run_time_ms(void) | ||
|
|
253 | + { | ||
|
|
254 | + if (!cg_enable) | ||
|
|
255 | + return 0; | ||
|
|
256 | + | ||
|
|
257 | + char buf[CG_BUFSIZE]; | ||
|
|
258 | + cg_read(CG_CPUACCT, "cpuacct.usage", buf); | ||
|
|
259 | + unsigned long long ns = atoll(buf); | ||
|
|
260 | + return ns / 1000000; | ||
|
|
261 | + } | ||
|
|
262 | + | ||
|
|
263 | + void | ||
|
|
264 | + cg_stats(void) | ||
|
|
265 | + { | ||
|
|
266 | + if (!cg_enable) | ||
|
|
267 | + return; | ||
|
|
268 | + | ||
|
|
269 | + char buf[CG_BUFSIZE]; | ||
|
|
270 | + | ||
|
|
271 | + // Memory usage statistics | ||
|
|
272 | + unsigned long long mem=0, memsw=0; | ||
|
|
273 | + if (cg_read(CG_MEMORY, "?memory.max_usage_in_bytes", buf)) | ||
|
|
274 | + mem = atoll(buf); | ||
|
|
275 | + if (cg_read(CG_MEMORY, "?memory.memsw.max_usage_in_bytes", buf)) | ||
|
|
276 | + { | ||
|
|
277 | + memsw = atoll(buf); | ||
|
|
278 | + if (memsw > mem) | ||
|
|
279 | + mem = memsw; | ||
|
|
280 | + } | ||
|
|
281 | + if (mem) | ||
|
|
282 | + meta_printf("cg-mem:%lld\n", mem >> 10); | ||
|
|
283 | + | ||
|
|
284 | + // OOM kill detection | ||
|
|
285 | + if (cg_read(CG_MEMORY, "?memory.oom_control", buf)) | ||
|
|
286 | + { | ||
|
|
287 | + int oom_killed = 0; | ||
|
|
288 | + char *s = buf; | ||
|
|
289 | + while (s) | ||
|
|
290 | + { | ||
|
|
291 | + if (sscanf(s, "oom_kill %d", &oom_killed) == 1 && oom_killed) | ||
|
|
292 | + { | ||
|
|
293 | + meta_printf("cg-oom-killed:1\n"); | ||
|
|
294 | + break; | ||
|
|
295 | + } | ||
|
|
296 | + s = strchr(s, '\n'); | ||
|
|
297 | + if (s) | ||
|
|
298 | + s++; | ||
|
|
299 | + } | ||
|
|
300 | + } | ||
|
|
301 | + } | ||
|
|
302 | + | ||
|
|
303 | + void | ||
|
|
304 | + cg_remove(void) | ||
|
|
305 | + { | ||
|
|
306 | + char buf[CG_BUFSIZE]; | ||
|
|
307 | + | ||
|
|
308 | + if (!cg_enable) | ||
|
|
309 | + return; | ||
|
|
310 | + | ||
|
|
311 | + FOREACH_CG_CONTROLLER(controller) | ||
|
|
312 | + { | ||
|
|
313 | + // The cgroup can be non-existent at this moment (e.g., --cleanup before the first --init) | ||
|
|
314 | + if (!cg_read(controller, "?tasks", buf)) | ||
|
|
315 | + continue; | ||
|
|
316 | + | ||
|
|
317 | + if (buf[0]) | ||
|
|
318 | + die("Some tasks left in controller %s of cgroup %s, failed to remove it", | ||
|
|
319 | + cg_controller_name(controller), cg_name); | ||
|
|
320 | + | ||
|
|
321 | + char path[256]; | ||
|
|
322 | + cg_makepath(path, sizeof(path), controller, ""); | ||
|
|
323 | + | ||
|
|
324 | + if (rmdir(path) < 0) | ||
|
|
325 | + die("Cannot remove control group %s: %m", path); | ||
|
|
326 | + } | ||
|
|
327 | + } |
@@ -0,0 +1,168 | |||||
|
|
1 | + /* | ||
|
|
2 | + * Process Isolator -- Configuration File | ||
|
|
3 | + * | ||
|
|
4 | + * (c) 2016 Martin Mares <mj@ucw.cz> | ||
|
|
5 | + */ | ||
|
|
6 | + | ||
|
|
7 | + #include "isolate.h" | ||
|
|
8 | + | ||
|
|
9 | + #include <errno.h> | ||
|
|
10 | + #include <stdio.h> | ||
|
|
11 | + #include <stdlib.h> | ||
|
|
12 | + #include <string.h> | ||
|
|
13 | + | ||
|
|
14 | + #define MAX_LINE_LEN 1024 | ||
|
|
15 | + | ||
|
|
16 | + char *cf_box_root; | ||
|
|
17 | + char *cf_cg_root; | ||
|
|
18 | + char *cf_cg_parent; | ||
|
|
19 | + int cf_first_uid; | ||
|
|
20 | + int cf_first_gid; | ||
|
|
21 | + int cf_num_boxes; | ||
|
|
22 | + | ||
|
|
23 | + static int line_number; | ||
|
|
24 | + static struct cf_per_box *per_box_configs; | ||
|
|
25 | + | ||
|
|
26 | + static void NONRET | ||
|
|
27 | + cf_err(char *msg) | ||
|
|
28 | + { | ||
|
|
29 | + die("Error in config file, line %d: %s", line_number, msg); | ||
|
|
30 | + } | ||
|
|
31 | + | ||
|
|
32 | + static char * | ||
|
|
33 | + cf_string(char *val) | ||
|
|
34 | + { | ||
|
|
35 | + return xstrdup(val); | ||
|
|
36 | + } | ||
|
|
37 | + | ||
|
|
38 | + static int | ||
|
|
39 | + cf_int(char *val) | ||
|
|
40 | + { | ||
|
|
41 | + char *end; | ||
|
|
42 | + errno = 0; | ||
|
|
43 | + long int x = strtol(val, &end, 10); | ||
|
|
44 | + if (errno || end == val || end && *end) | ||
|
|
45 | + cf_err("Invalid number"); | ||
|
|
46 | + if ((long int)(int) x != x) | ||
|
|
47 | + cf_err("Number out of range"); | ||
|
|
48 | + return x; | ||
|
|
49 | + } | ||
|
|
50 | + | ||
|
|
51 | + static void | ||
|
|
52 | + cf_entry_toplevel(char *key, char *val) | ||
|
|
53 | + { | ||
|
|
54 | + if (!strcmp(key, "box_root")) | ||
|
|
55 | + cf_box_root = cf_string(val); | ||
|
|
56 | + else if (!strcmp(key, "cg_root")) | ||
|
|
57 | + cf_cg_root = cf_string(val); | ||
|
|
58 | + else if (!strcmp(key, "cg_parent")) | ||
|
|
59 | + cf_cg_parent = cf_string(val); | ||
|
|
60 | + else if (!strcmp(key, "first_uid")) | ||
|
|
61 | + cf_first_uid = cf_int(val); | ||
|
|
62 | + else if (!strcmp(key, "first_gid")) | ||
|
|
63 | + cf_first_gid = cf_int(val); | ||
|
|
64 | + else if (!strcmp(key, "num_boxes")) | ||
|
|
65 | + cf_num_boxes = cf_int(val); | ||
|
|
66 | + else | ||
|
|
67 | + cf_err("Unknown configuration item"); | ||
|
|
68 | + } | ||
|
|
69 | + | ||
|
|
70 | + static void | ||
|
|
71 | + cf_entry_compound(char *key, char *subkey, char *val) | ||
|
|
72 | + { | ||
|
|
73 | + if (strncmp(key, "box", 3)) | ||
|
|
74 | + cf_err("Unknown configuration section"); | ||
|
|
75 | + int box_id = cf_int(key + 3); | ||
|
|
76 | + struct cf_per_box *c = cf_per_box(box_id); | ||
|
|
77 | + | ||
|
|
78 | + if (!strcmp(subkey, "cpus")) | ||
|
|
79 | + c->cpus = cf_string(val); | ||
|
|
80 | + else if (!strcmp(subkey, "mems")) | ||
|
|
81 | + c->mems = cf_string(val); | ||
|
|
82 | + else | ||
|
|
83 | + cf_err("Unknown per-box configuration item"); | ||
|
|
84 | + } | ||
|
|
85 | + | ||
|
|
86 | + static void | ||
|
|
87 | + cf_entry(char *key, char *val) | ||
|
|
88 | + { | ||
|
|
89 | + char *dot = strchr(key, '.'); | ||
|
|
90 | + if (!dot) | ||
|
|
91 | + cf_entry_toplevel(key, val); | ||
|
|
92 | + else | ||
|
|
93 | + { | ||
|
|
94 | + *dot++ = 0; | ||
|
|
95 | + cf_entry_compound(key, dot, val); | ||
|
|
96 | + } | ||
|
|
97 | + } | ||
|
|
98 | + | ||
|
|
99 | + static void | ||
|
|
100 | + cf_check(void) | ||
|
|
101 | + { | ||
|
|
102 | + if (!cf_box_root || | ||
|
|
103 | + !cf_cg_root || | ||
|
|
104 | + !cf_first_uid || | ||
|
|
105 | + !cf_first_gid || | ||
|
|
106 | + !cf_num_boxes) | ||
|
|
107 | + cf_err("Configuration is not complete"); | ||
|
|
108 | + } | ||
|
|
109 | + | ||
|
|
110 | + void | ||
|
|
111 | + cf_parse(void) | ||
|
|
112 | + { | ||
|
|
113 | + FILE *f = fopen(CONFIG_FILE, "r"); | ||
|
|
114 | + if (!f) | ||
|
|
115 | + die("Cannot open %s: %m", CONFIG_FILE); | ||
|
|
116 | + | ||
|
|
117 | + char line[MAX_LINE_LEN]; | ||
|
|
118 | + while (fgets(line, sizeof(line), f)) | ||
|
|
119 | + { | ||
|
|
120 | + line_number++; | ||
|
|
121 | + char *nl = strchr(line, '\n'); | ||
|
|
122 | + if (!nl) | ||
|
|
123 | + cf_err("Line not terminated or too long"); | ||
|
|
124 | + *nl = 0; | ||
|
|
125 | + | ||
|
|
126 | + if (!line[0] || line[0] == '#') | ||
|
|
127 | + continue; | ||
|
|
128 | + | ||
|
|
129 | + char *s = line; | ||
|
|
130 | + while (*s && *s != ' ' && *s != '\t' && *s != '=') | ||
|
|
131 | + s++; | ||
|
|
132 | + while (*s == ' ' || *s == '\t') | ||
|
|
133 | + *s++ = 0; | ||
|
|
134 | + if (*s != '=') | ||
|
|
135 | + cf_err("Syntax error, expecting key=value"); | ||
|
|
136 | + *s++ = 0; | ||
|
|
137 | + while (*s == ' ' || *s == '\t') | ||
|
|
138 | + *s++ = 0; | ||
|
|
139 | + | ||
|
|
140 | + cf_entry(line, s); | ||
|
|
141 | + } | ||
|
|
142 | + | ||
|
|
143 | + fclose(f); | ||
|
|
144 | + cf_check(); | ||
|
|
145 | + } | ||
|
|
146 | + | ||
|
|
147 | + struct cf_per_box * | ||
|
|
148 | + cf_per_box(int box_id) | ||
|
|
149 | + { | ||
|
|
150 | + struct cf_per_box *c; | ||
|
|
151 | + | ||
|
|
152 | + for (c = per_box_configs; c; c = c->next) | ||
|
|
153 | + if (c->box_id == box_id) | ||
|
|
154 | + return c; | ||
|
|
155 | + | ||
|
|
156 | + c = xmalloc(sizeof(*c)); | ||
|
|
157 | + memset(c, 0, sizeof(*c)); | ||
|
|
158 | + c->next = per_box_configs; | ||
|
|
159 | + per_box_configs = c; | ||
|
|
160 | + c->box_id = box_id; | ||
|
|
161 | + return c; | ||
|
|
162 | + } | ||
|
|
163 | + | ||
|
|
164 | + struct cf_per_box * | ||
|
|
165 | + cf_current_box(void) | ||
|
|
166 | + { | ||
|
|
167 | + return cf_per_box(box_id); | ||
|
|
168 | + } |
@@ -0,0 +1,24 | |||||
|
|
1 | + # This is a configuration file for Isolate | ||
|
|
2 | + | ||
|
|
3 | + # All sandboxes are created under this directory. | ||
|
|
4 | + # To avoid symlink attacks, this directory and all its ancestors | ||
|
|
5 | + # must be writeable only to root. | ||
|
|
6 | + box_root = /var/local/lib/isolate | ||
|
|
7 | + | ||
|
|
8 | + # Root of the control group hierarchy | ||
|
|
9 | + cg_root = /sys/fs/cgroup | ||
|
|
10 | + | ||
|
|
11 | + # If the following variable is defined, the per-box cgroups | ||
|
|
12 | + # are created as sub-groups of the named cgroup | ||
|
|
13 | + #cg_parent = boxes | ||
|
|
14 | + | ||
|
|
15 | + # Block of UIDs and GIDs reserved for sandboxes | ||
|
|
16 | + first_uid = 60000 | ||
|
|
17 | + first_gid = 60000 | ||
|
|
18 | + num_boxes = 1000 | ||
|
|
19 | + | ||
|
|
20 | + # Per-box settings of the set of allowed CPUs and NUMA nodes | ||
|
|
21 | + # (see linux/Documentation/cgroups/cpusets.txt for precise syntax) | ||
|
|
22 | + | ||
|
|
23 | + #box0.cpus = 4-7 | ||
|
|
24 | + #box0.mems = 1 |
@@ -0,0 +1,224 | |||||
|
|
1 | + #!/bin/sh | ||
|
|
2 | + # | ||
|
|
3 | + # Identifies potential sources issues when using isolate. | ||
|
|
4 | + # | ||
|
|
5 | + # (c) 2017 Bernard Blackham <bernard@blackham.com.au> | ||
|
|
6 | + # | ||
|
|
7 | + | ||
|
|
8 | + usage() { | ||
|
|
9 | + cat <<EOT >&2 | ||
|
|
10 | + Usage: $0 [-q|--quiet] [-e|--execute] | ||
|
|
11 | + | ||
|
|
12 | + Use this script to identify sources of run-time variability and other issues on | ||
|
|
13 | + Linux machines which may affect isolate. If --execute is not specified, the | ||
|
|
14 | + recommended actions are written to stdout as an executable shell script, | ||
|
|
15 | + otherwise, using --execute will attempt to make changes to make the system | ||
|
|
16 | + behave more deterministically. The changes performed by --execute persist only | ||
|
|
17 | + until a reboot. To persist across reboots, the standard output from this script | ||
|
|
18 | + should be added to /etc/rc.local or some other script that is run on each boot. | ||
|
|
19 | + Alternately, you could add the following line to /etc/rc.local to automatically | ||
|
|
20 | + apply these changes on boot, but use this with caution as not all issues can | ||
|
|
21 | + be resolved in this way. | ||
|
|
22 | + | ||
|
|
23 | + isolate-check-environment --quiet --execute | ||
|
|
24 | + | ||
|
|
25 | + The exit status of this script will be 0 if all checks pass, or 1 if some | ||
|
|
26 | + checks have failed. | ||
|
|
27 | + | ||
|
|
28 | + Note that there are more strategies to reduce run-time variability further. | ||
|
|
29 | + See the man page of isolate for details under REPRODUCIBILITY. | ||
|
|
30 | + EOT | ||
|
|
31 | + exit 2 | ||
|
|
32 | + } | ||
|
|
33 | + | ||
|
|
34 | + # Parse options. | ||
|
|
35 | + args=$(getopt -o "ehq" --long "execute,help,quiet" -- "$@") || usage | ||
|
|
36 | + eval set -- "$args" | ||
|
|
37 | + quiet= | ||
|
|
38 | + execute= | ||
|
|
39 | + while : ; do | ||
|
|
40 | + case "$1" in | ||
|
|
41 | + -q|--quiet) quiet=1 ; shift ;; | ||
|
|
42 | + -e|--execute) execute=1 ; shift ;; | ||
|
|
43 | + -h|--help) usage ;; | ||
|
|
44 | + --) shift ; break ;; | ||
|
|
45 | + *) usage ;; | ||
|
|
46 | + esac | ||
|
|
47 | + done | ||
|
|
48 | + [ -n "$*" ] && usage | ||
|
|
49 | + | ||
|
|
50 | + # Some helper boilerplate machinery. | ||
|
|
51 | + exit_status=0 | ||
|
|
52 | + red=$(tput setaf 1) | ||
|
|
53 | + green=$(tput setaf 2) | ||
|
|
54 | + yellow=$(tput setaf 3) | ||
|
|
55 | + normal=$(tput sgr0) | ||
|
|
56 | + | ||
|
|
57 | + # Return true (0) if we are being quiet. | ||
|
|
58 | + quiet() { | ||
|
|
59 | + [ -n "$quiet" ] | ||
|
|
60 | + } | ||
|
|
61 | + | ||
|
|
62 | + # Print all arguments to stderr as warning. | ||
|
|
63 | + warn() { | ||
|
|
64 | + quiet || echo WARNING: "$*" >&2 | ||
|
|
65 | + } | ||
|
|
66 | + | ||
|
|
67 | + # Print first argument to stderr as warning, and second argument to stdout as | ||
|
|
68 | + # the recommended remedial action, or execute if --execute is given. | ||
|
|
69 | + action() { | ||
|
|
70 | + quiet || warn "$1" | ||
|
|
71 | + if [ -n "$execute" ] ; then | ||
|
|
72 | + quiet || echo "+ $2" | ||
|
|
73 | + sh -c "$2" | ||
|
|
74 | + else | ||
|
|
75 | + quiet || echo $2 | ||
|
|
76 | + fi | ||
|
|
77 | + } | ||
|
|
78 | + | ||
|
|
79 | + print_start_check() { | ||
|
|
80 | + quiet && return | ||
|
|
81 | + print_check_status=1 | ||
|
|
82 | + echo -n "Checking for $@ ... " >&2 | ||
|
|
83 | + } | ||
|
|
84 | + | ||
|
|
85 | + print_fail() { | ||
|
|
86 | + exit_status=1 | ||
|
|
87 | + quiet && return | ||
|
|
88 | + [ -n "$print_check_status" ] && echo "${red}FAIL${normal}" >&2 | ||
|
|
89 | + print_check_status= | ||
|
|
90 | + } | ||
|
|
91 | + | ||
|
|
92 | + print_dubious() { | ||
|
|
93 | + exit_status=1 | ||
|
|
94 | + quiet && return | ||
|
|
95 | + [ -n "$print_check_status" ] && echo "${yellow}CAUTION${normal}" >&2 | ||
|
|
96 | + print_check_status= | ||
|
|
97 | + } | ||
|
|
98 | + | ||
|
|
99 | + print_skipped() { | ||
|
|
100 | + quiet && return | ||
|
|
101 | + [ -n "$print_check_status" ] && echo "SKIPPED (not detected)" >&2 | ||
|
|
102 | + print_check_status= | ||
|
|
103 | + } | ||
|
|
104 | + | ||
|
|
105 | + print_finish() { | ||
|
|
106 | + quiet && return | ||
|
|
107 | + [ -n "$print_check_status" ] && echo "${green}PASS${normal}" >&2 | ||
|
|
108 | + print_check_status= | ||
|
|
109 | + } | ||
|
|
110 | + | ||
|
|
111 | + # Check that cgroups are enabled. | ||
|
|
112 | + cgroup_check() { | ||
|
|
113 | + local cgroup=$1 | ||
|
|
114 | + print_start_check "cgroup support for $cgroup" | ||
|
|
115 | + if ! test -f "/sys/fs/cgroup/$cgroup/tasks" ; then | ||
|
|
116 | + print_dubious | ||
|
|
117 | + warn "the $cgroup is not present. isolate --cg cannot be used." | ||
|
|
118 | + fi | ||
|
|
119 | + print_finish | ||
|
|
120 | + } | ||
|
|
121 | + cgroup_check memory | ||
|
|
122 | + cgroup_check cpuacct | ||
|
|
123 | + cgroup_check cpuset | ||
|
|
124 | + | ||
|
|
125 | + # Check that swap is either disabled or accounted for. | ||
|
|
126 | + swap_check() { | ||
|
|
127 | + print_start_check "swap" | ||
|
|
128 | + # If swap is disabled, there is nothing to worry about. | ||
|
|
129 | + local swaps | ||
|
|
130 | + swaps=$(swapon --noheadings) | ||
|
|
131 | + if [ -n "$swaps" ] ; then | ||
|
|
132 | + # Swap is enabled. We had better have the memsw support in the memory | ||
|
|
133 | + # cgroup. | ||
|
|
134 | + if ! test -f "/sys/fs/cgroup/memory/memory.memsw.usage_in_bytes" ; then | ||
|
|
135 | + print_fail | ||
|
|
136 | + action \ | ||
|
|
137 | + "swap is enabled, but swap accounting is not. isolate will not be able to enforce memory limits." \ | ||
|
|
138 | + "swapoff -a" | ||
|
|
139 | + else | ||
|
|
140 | + print_dubious | ||
|
|
141 | + warn "swap is enabled, and although accounted for, may still give run-time variability under memory pressure." | ||
|
|
142 | + fi | ||
|
|
143 | + fi | ||
|
|
144 | + print_finish | ||
|
|
145 | + } | ||
|
|
146 | + swap_check | ||
|
|
147 | + | ||
|
|
148 | + # Check that CPU frequency scaling is disabled. | ||
|
|
149 | + cpufreq_check() { | ||
|
|
150 | + print_start_check "CPU frequency scaling" | ||
|
|
151 | + local anycpus policy | ||
|
|
152 | + anycpus= | ||
|
|
153 | + # Ensure cpufreq governor is set to performance on all CPUs | ||
|
|
154 | + for cpufreq_file in $(find /sys/devices/system/cpu/cpufreq/ -name scaling_governor) ; do | ||
|
|
155 | + policy=$(cat $cpufreq_file) | ||
|
|
156 | + if [ "$policy" != "performance" ] ; then | ||
|
|
157 | + print_fail | ||
|
|
158 | + action \ | ||
|
|
159 | + "cpufreq governor set to '$policy', but 'performance' would be better" \ | ||
|
|
160 | + "echo performance > $cpufreq_file" | ||
|
|
161 | + fi | ||
|
|
162 | + anycpus=1 | ||
|
|
163 | + done | ||
|
|
164 | + [ -z "$anycpus" ] && print_skipped | ||
|
|
165 | + print_finish | ||
|
|
166 | + } | ||
|
|
167 | + cpufreq_check | ||
|
|
168 | + | ||
|
|
169 | + # Check that address space layout randomisation is disabled. | ||
|
|
170 | + aslr_check() { | ||
|
|
171 | + print_start_check "kernel address space randomisation" | ||
|
|
172 | + local val | ||
|
|
173 | + if val=$(cat /proc/sys/kernel/randomize_va_space 2>/dev/null) ; then | ||
|
|
174 | + if [ "$val" -ne 0 ] ; then | ||
|
|
175 | + print_fail | ||
|
|
176 | + action \ | ||
|
|
177 | + "address space randomisation is enabled." \ | ||
|
|
178 | + "echo 0 > /proc/sys/kernel/randomize_va_space" | ||
|
|
179 | + fi | ||
|
|
180 | + else | ||
|
|
181 | + print_skipped | ||
|
|
182 | + fi | ||
|
|
183 | + print_finish | ||
|
|
184 | + } | ||
|
|
185 | + aslr_check | ||
|
|
186 | + | ||
|
|
187 | + # Check that transparent huge-pages are disabled, as this leads to | ||
|
|
188 | + # non-determinism depending on whether the kernel can allocate 2 MiB pages or | ||
|
|
189 | + # not. | ||
|
|
190 | + thp_check() { | ||
|
|
191 | + print_start_check "transparent hugepage support" | ||
|
|
192 | + local val | ||
|
|
193 | + if val=$(cat /sys/kernel/mm/transparent_hugepage/enabled 2>/dev/null) ; then | ||
|
|
194 | + case $val in | ||
|
|
195 | + *'[never]'*) ;; | ||
|
|
196 | + *) print_fail | ||
|
|
197 | + action \ | ||
|
|
198 | + "transparent hugepages are enabled." \ | ||
|
|
199 | + "echo never > /sys/kernel/mm/transparent_hugepage/enabled" ;; | ||
|
|
200 | + esac | ||
|
|
201 | + fi | ||
|
|
202 | + if val=$(cat /sys/kernel/mm/transparent_hugepage/defrag 2>/dev/null) ; then | ||
|
|
203 | + case $val in | ||
|
|
204 | + *'[never]'*) ;; | ||
|
|
205 | + *) print_fail | ||
|
|
206 | + action \ | ||
|
|
207 | + "transparent hugepage defrag is enabled." \ | ||
|
|
208 | + "echo never > /sys/kernel/mm/transparent_hugepage/defrag" ;; | ||
|
|
209 | + esac | ||
|
|
210 | + fi | ||
|
|
211 | + if val=$(cat /sys/kernel/mm/transparent_hugepage/khugepaged/defrag 2>/dev/null) ; then | ||
|
|
212 | + if [ "$val" -ne 0 ] ; then | ||
|
|
213 | + print_fail | ||
|
|
214 | + action \ | ||
|
|
215 | + "khugepaged defrag is enabled." \ | ||
|
|
216 | + "echo 0 > /sys/kernel/mm/transparent_hugepage/khugepaged/defrag" | ||
|
|
217 | + fi | ||
|
|
218 | + fi | ||
|
|
219 | + print_finish | ||
|
|
220 | + } | ||
|
|
221 | + thp_check | ||
|
|
222 | + | ||
|
|
223 | + | ||
|
|
224 | + exit $exit_status |
@@ -0,0 +1,348 | |||||
|
|
1 | + ISOLATE(1) | ||
|
|
2 | + ========== | ||
|
|
3 | + | ||
|
|
4 | + NAME | ||
|
|
5 | + ---- | ||
|
|
6 | + isolate - Isolate a process using Linux Containers | ||
|
|
7 | + | ||
|
|
8 | + SYNOPSIS | ||
|
|
9 | + -------- | ||
|
|
10 | + *isolate* 'options' *--init* | ||
|
|
11 | + | ||
|
|
12 | + *isolate* 'options' *--run* +--+ 'program' 'arguments' | ||
|
|
13 | + | ||
|
|
14 | + *isolate* 'options' *--cleanup* | ||
|
|
15 | + | ||
|
|
16 | + DESCRIPTION | ||
|
|
17 | + ----------- | ||
|
|
18 | + Run 'program' within a sandbox, so that it cannot communicate with the | ||
|
|
19 | + outside world and its resource consumption is limited. This can be used | ||
|
|
20 | + for example in a programming contest to run untrusted programs submitted | ||
|
|
21 | + by contestants in a controlled environment. | ||
|
|
22 | + | ||
|
|
23 | + The sandbox is used in the following way: | ||
|
|
24 | + | ||
|
|
25 | + * Run *isolate --init*, which initializes the sandbox, creates its working directory and | ||
|
|
26 | + prints its name to the standard output. Fails if the sandbox already existed. | ||
|
|
27 | + | ||
|
|
28 | + * Populate the directory with the executable file of the program and its | ||
|
|
29 | + input files. | ||
|
|
30 | + | ||
|
|
31 | + * Call *isolate --run* to run the program. A single line describing the | ||
|
|
32 | + status of the program is written to the standard error stream. | ||
|
|
33 | + | ||
|
|
34 | + * Fetch the output of the program from the directory. | ||
|
|
35 | + | ||
|
|
36 | + * Run *isolate --cleanup* to remove temporary files. Does nothing if the sandbox | ||
|
|
37 | + was already cleaned up. | ||
|
|
38 | + | ||
|
|
39 | + Please note that by default, the program is not allowed to start multiple | ||
|
|
40 | + processes of threads. If you need that, turn on the control group mode | ||
|
|
41 | + (see below). | ||
|
|
42 | + | ||
|
|
43 | + OPTIONS | ||
|
|
44 | + ------- | ||
|
|
45 | + *-M, --meta=*'file':: | ||
|
|
46 | + Output meta-data on the execution of the program to a given file. | ||
|
|
47 | + See below for syntax of the meta-files. | ||
|
|
48 | + | ||
|
|
49 | + *-m, --mem=*'size':: | ||
|
|
50 | + Limit address space of the program to 'size' kilobytes. If more processes | ||
|
|
51 | + are allowed, this applies to each of them separately. | ||
|
|
52 | + | ||
|
|
53 | + *-t, --time=*'time':: | ||
|
|
54 | + Limit run time of the program to 'time' seconds. Fractional numbers are allowed. | ||
|
|
55 | + Time in which the OS assigns the processor to different tasks is not counted. | ||
|
|
56 | + | ||
|
|
57 | + *-w, --wall-time=*'time':: | ||
|
|
58 | + Limit wall-clock time to 'time' seconds. Fractional values are allowed. | ||
|
|
59 | + This clock measures the time from the start of the program to its exit, | ||
|
|
60 | + so it does not stop when the program has lost the CPU or when it is waiting | ||
|
|
61 | + for an external event. We recommend to use *--time* as the main limit, | ||
|
|
62 | + but set *--wall-time* to a much higher value as a precaution against | ||
|
|
63 | + sleeping programs. | ||
|
|
64 | + | ||
|
|
65 | + *-x, --extra-time=*'time':: | ||
|
|
66 | + When a time limit is exceeded, wait for extra 'time' seconds before | ||
|
|
67 | + killing the program. This has the advantage that the real execution time | ||
|
|
68 | + is reported, even though it slightly exceeds the limit. Fractional | ||
|
|
69 | + numbers are again allowed. | ||
|
|
70 | + | ||
|
|
71 | + *-b, --box-id=*'id':: | ||
|
|
72 | + When you run multiple sandboxes in parallel, you have to assign each unique | ||
|
|
73 | + IDs to them by this option. See the discussion on UIDs in the INSTALLATION | ||
|
|
74 | + section. The ID defaults to 0. | ||
|
|
75 | + | ||
|
|
76 | + *-k, --stack=*'size':: | ||
|
|
77 | + Limit process stack to 'size' kilobytes. By default, the whole address | ||
|
|
78 | + space is available for the stack, but it is subject to the *--mem* limit. | ||
|
|
79 | + | ||
|
|
80 | + *-f, --fsize=*'size':: | ||
|
|
81 | + Limit size of files created (or modified) by the program to 'size' kilobytes. | ||
|
|
82 | + In most cases, it is better to restrict overall disk usage by a disk quota | ||
|
|
83 | + (see below). This option can help in cases when quotas are not enabled | ||
|
|
84 | + on the underlying filesystem. | ||
|
|
85 | + | ||
|
|
86 | + *-q, --quota=*'blocks'*,*'inodes':: | ||
|
|
87 | + Set disk quota to a given number of blocks and inodes. This requires the | ||
|
|
88 | + filesystem to be mounted with support for quotas. Please note that this | ||
|
|
89 | + currently works only on the ext family of filesystems (other filesystems | ||
|
|
90 | + use other interfaces for setting quotas). | ||
|
|
91 | + | ||
|
|
92 | + *-i, --stdin=*'file':: | ||
|
|
93 | + Redirect standard input from 'file'. The 'file' has to be accessible | ||
|
|
94 | + inside the sandbox. Otherwise, standard input is inherited from the | ||
|
|
95 | + parent process. | ||
|
|
96 | + | ||
|
|
97 | + *-o, --stdout=*'file':: | ||
|
|
98 | + Redirect standard output to 'file'. The 'file' has to be accessible | ||
|
|
99 | + inside the sandbox. Otherwise, standard output is inherited from the | ||
|
|
100 | + parent process and the sandbox manager does not write anything to it. | ||
|
|
101 | + | ||
|
|
102 | + *-r, --stderr=*'file':: | ||
|
|
103 | + Redirect standard error output to 'file'. The 'file' has to be accessible | ||
|
|
104 | + inside the sandbox. Otherwise, standard error output is inherited from the | ||
|
|
105 | + parent process. See also *--stderr-to-stdout*. | ||
|
|
106 | + | ||
|
|
107 | + *--stderr-to-stdout*:: | ||
|
|
108 | + Redirect standard error output to standard output. This is performed after | ||
|
|
109 | + the standard output is redirected by *--stdout*. Mutually exclusive with *--stderr*. | ||
|
|
110 | + | ||
|
|
111 | + *-c, --chdir=*'dir':: | ||
|
|
112 | + Change directory to 'dir' before executing the program. This path must be | ||
|
|
113 | + relative to the root of the sandbox. | ||
|
|
114 | + | ||
|
|
115 | + *-p, --processes*[*=*'max']:: | ||
|
|
116 | + Permit the program to create up to 'max' processes and/or threads. Please | ||
|
|
117 | + keep in mind that time and memory limit do not work with multiple processes | ||
|
|
118 | + unless you enable the control group mode. If 'max' is not given, an arbitrary | ||
|
|
119 | + number of processes can be run. By default, only one process is permitted. | ||
|
|
120 | + | ||
|
|
121 | + *--share-net*:: | ||
|
|
122 | + By default, isolate creates a new network namespace for its child process. | ||
|
|
123 | + This namespace contains no network devices except for a per-namespace loopback. | ||
|
|
124 | + This prevents the program from communicating with the outside world. If you want | ||
|
|
125 | + to permit communication, you can use this switch to keep the child process | ||
|
|
126 | + in parent's network namespace. | ||
|
|
127 | + | ||
|
|
128 | + *--inherit-fds*:: | ||
|
|
129 | + By default, isolate closes all file descriptors passed from its parent | ||
|
|
130 | + except for descriptors 0, 1, and 2. | ||
|
|
131 | + This prevents unintentional descriptor leaks. In some cases, passing extra | ||
|
|
132 | + descriptors to the sandbox can be desirable, so you can use this switch | ||
|
|
133 | + to make them survive. | ||
|
|
134 | + | ||
|
|
135 | + *-v, --verbose*:: | ||
|
|
136 | + Tell the sandbox manager to be verbose and report on what is going on. | ||
|
|
137 | + Using *-v* multiple times produces even more jabber. | ||
|
|
138 | + | ||
|
|
139 | + *-s, --silent*:: | ||
|
|
140 | + Tell the sandbox manager to keep silence. No status messages are printed | ||
|
|
141 | + to stderr except for fatal errors of the sandbox itself. The combination of | ||
|
|
142 | + *--verbose* and *--silent* has an undefined effect. | ||
|
|
143 | + | ||
|
|
144 | + ENVIRONMENT RULES | ||
|
|
145 | + ----------------- | ||
|
|
146 | + UNIX processes normally inherit all environment variables from their parent. The | ||
|
|
147 | + sandbox however passes only those variables which are explicitly requested by | ||
|
|
148 | + environment rules: | ||
|
|
149 | + | ||
|
|
150 | + *-E, --env=*'var':: | ||
|
|
151 | + Inherit the variable 'var' from the parent. | ||
|
|
152 | + | ||
|
|
153 | + *-E, --env=*'var'*=*'value':: | ||
|
|
154 | + Set the variable 'var' to 'value'. When the 'value' is empty, the | ||
|
|
155 | + variable is removed from the environment. | ||
|
|
156 | + | ||
|
|
157 | + *-e, --full-env*:: | ||
|
|
158 | + Inherit all variables from the parent. | ||
|
|
159 | + | ||
|
|
160 | + The rules are applied in the order in which they were given, except for | ||
|
|
161 | + *--full-env*, which is applied first. | ||
|
|
162 | + | ||
|
|
163 | + The list of rules is automatically initialized with *-ELIBC_FATAL_STDERR_=1*. | ||
|
|
164 | + | ||
|
|
165 | + DIRECTORY RULES | ||
|
|
166 | + --------------- | ||
|
|
167 | + The sandboxed process gets its own filesystem namespace, which contains only subtrees | ||
|
|
168 | + requested by directory rules: | ||
|
|
169 | + | ||
|
|
170 | + *-d, --dir=*'in'*=*'out'[*:*'options']:: | ||
|
|
171 | + Bind the directory 'out' as seen by the caller to the path 'in' inside the sandbox. | ||
|
|
172 | + If there already was a directory rule for 'in', it is replaced. | ||
|
|
173 | + | ||
|
|
174 | + *-d, --dir=*'dir'[*:*'options']:: | ||
|
|
175 | + Bind the directory +/+'dir' to 'dir' inside the sandbox. | ||
|
|
176 | + If there already was a directory rule for 'in', it is replaced. | ||
|
|
177 | + | ||
|
|
178 | + *-d, --dir=*'in'*=*:: | ||
|
|
179 | + Remove a directory rule for the path 'in' inside the sandbox. | ||
|
|
180 | + | ||
|
|
181 | + By default, all directories are bound read-only and restricted (no devices, | ||
|
|
182 | + no setuid binaries). This behavior can be modified using the 'options': | ||
|
|
183 | + | ||
|
|
184 | + *rw*:: | ||
|
|
185 | + Allow read-write access. | ||
|
|
186 | + | ||
|
|
187 | + *dev*:: | ||
|
|
188 | + Allow access to character and block devices. | ||
|
|
189 | + | ||
|
|
190 | + *noexec*:: | ||
|
|
191 | + Disallow execution of binaries. | ||
|
|
192 | + | ||
|
|
193 | + *maybe*:: | ||
|
|
194 | + Silently ignore the rule if the directory to be bound does not exist. | ||
|
|
195 | + | ||
|
|
196 | + *fs*:: | ||
|
|
197 | + Instead of binding a directory, mount a device-less filesystem called 'in'. | ||
|
|
198 | + For example, this can be 'proc' or 'sysfs'. | ||
|
|
199 | + | ||
|
|
200 | + Unless *--no-default-dirs* is specified, the default set of directory rules binds +/bin+, | ||
|
|
201 | + +/dev+ (with devices allowed), +/lib+, +/lib64+ (if it exists), and +/usr+. It also binds | ||
|
|
202 | + the working directory to +/box+ (read-write) and mounts the proc filesystem at +/proc+. | ||
|
|
203 | + | ||
|
|
204 | + *-D, --no-default-dirs*:: | ||
|
|
205 | + Do not bind the default set of directories. Care has to be taken to specify | ||
|
|
206 | + the correct set of rules (using *--dir*) for the executed program to run | ||
|
|
207 | + correctly. In particular, +/box+ has to be bound. | ||
|
|
208 | + | ||
|
|
209 | + CONTROL GROUPS | ||
|
|
210 | + -------------- | ||
|
|
211 | + Isolate can make use of system control groups provided by the kernel | ||
|
|
212 | + to constrain programs consisting of multiple processes. Please note | ||
|
|
213 | + that this feature needs special system setup described in the INSTALLATION | ||
|
|
214 | + section. | ||
|
|
215 | + | ||
|
|
216 | + *--cg*:: | ||
|
|
217 | + Enable use of control groups. This should be specified with *--init*, | ||
|
|
218 | + *--run* and *--cleanup*. | ||
|
|
219 | + | ||
|
|
220 | + *--cg-mem=*'size':: | ||
|
|
221 | + Limit total memory usage by the whole control group to 'size' kilobytes. | ||
|
|
222 | + This should be specified with *--run*. | ||
|
|
223 | + | ||
|
|
224 | + *--cg-timing*:: | ||
|
|
225 | + Use control groups for timing, so that the *--time* switch affects the | ||
|
|
226 | + total run time of all processes and threads in the control group. | ||
|
|
227 | + This should be specified with *--run*. | ||
|
|
228 | + This option is turned on by default, use *--no-cg-timing* to turn off. | ||
|
|
229 | + | ||
|
|
230 | + META-FILES | ||
|
|
231 | + ---------- | ||
|
|
232 | + The meta-file contains miscellaneous meta-information on execution of the | ||
|
|
233 | + program within the sandbox. It is a textual file consisting of lines | ||
|
|
234 | + of format 'key'*:*'value'. The following keys are defined: | ||
|
|
235 | + | ||
|
|
236 | + *cg-mem*:: | ||
|
|
237 | + When control groups are enabled, this is the total memory use | ||
|
|
238 | + by the whole control group (in kilobytes). | ||
|
|
239 | + *cg-oom-killed*:: | ||
|
|
240 | + Present when the program was killed by the out-of-memory killer | ||
|
|
241 | + (e.g., because it has exceeded the memory limit of its control group). | ||
|
|
242 | + This is reported only on Linux 4.13 and later. | ||
|
|
243 | + *csw-forced*:: | ||
|
|
244 | + Number of context switches forced by the kernel. | ||
|
|
245 | + *csw-voluntary*:: | ||
|
|
246 | + Number of context switches caused by the process giving up the CPU | ||
|
|
247 | + voluntarily. | ||
|
|
248 | + *exitcode*:: | ||
|
|
249 | + The program has exited normally with this exit code. | ||
|
|
250 | + *exitsig*:: | ||
|
|
251 | + The program has exited after receiving this fatal signal. | ||
|
|
252 | + *killed*:: | ||
|
|
253 | + Present when the program was terminated by the sandbox | ||
|
|
254 | + (e.g., because it has exceeded the time limit). | ||
|
|
255 | + *max-rss*:: | ||
|
|
256 | + Maximum resident set size of the process (in kilobytes). | ||
|
|
257 | + *message*:: | ||
|
|
258 | + Status message, not intended for machine processing. | ||
|
|
259 | + E.g., "Time limit exceeded." | ||
|
|
260 | + *status*:: | ||
|
|
261 | + Two-letter status code: | ||
|
|
262 | + * *RE* -- run-time error, i.e., exited with a non-zero exit code | ||
|
|
263 | + * *SG* -- program died on a signal | ||
|
|
264 | + * *TO* -- timed out | ||
|
|
265 | + * *XX* -- internal error of the sandbox | ||
|
|
266 | + *time*:: | ||
|
|
267 | + Run time of the program in fractional seconds. | ||
|
|
268 | + *time-wall*:: | ||
|
|
269 | + Wall clock time of the program in fractional seconds. | ||
|
|
270 | + | ||
|
|
271 | + Please note that not all keys have to be present. | ||
|
|
272 | + For example, no *status* nor *message* is reported upon normal termination. | ||
|
|
273 | + | ||
|
|
274 | + RETURN VALUE | ||
|
|
275 | + ------------ | ||
|
|
276 | + When the program inside the sandbox finishes correctly, the sandbox returns 0. | ||
|
|
277 | + If it finishes incorrectly, it returns 1. | ||
|
|
278 | + All other return codes signal an internal error. | ||
|
|
279 | + | ||
|
|
280 | + INSTALLATION | ||
|
|
281 | + ------------ | ||
|
|
282 | + Isolate depends on several advanced features of the Linux kernel. Please | ||
|
|
283 | + make sure that your kernel supports | ||
|
|
284 | + PID namespaces (+CONFIG_PID_NS+), | ||
|
|
285 | + IPC namespaces (+CONFIG_IPC_NS+), and | ||
|
|
286 | + network namespaces (+CONFIG_NET_NS+). | ||
|
|
287 | + If you want to use control groups, you need | ||
|
|
288 | + the cpusets (+CONFIG_CPUSETS+), | ||
|
|
289 | + CPU accounting controller (+CONFIG_CGROUP_CPUACCT+), and | ||
|
|
290 | + memory resource controller (+CONFIG_MEMCG+). If your machine has swap enabled, | ||
|
|
291 | + you should also enable the swap controller (+CONFIG_MEMCG_SWAP+). | ||
|
|
292 | + | ||
|
|
293 | + Debian 7.x and newer require enabling the memory and swap cgroup controllers by | ||
|
|
294 | + adding the parameters "cgroup_enable=memory swapaccount=1" to the kernel | ||
|
|
295 | + command-line, which can be set using +GRUB_CMDLINE_LINUX_DEFAULT+ in | ||
|
|
296 | + /etc/default/grub. | ||
|
|
297 | + | ||
|
|
298 | + Isolate is designed to run setuid to root. The sub-process inside the sandbox | ||
|
|
299 | + then switches to a non-privileged user ID (different for each *--box-id*). | ||
|
|
300 | + The range of UIDs available and several filesystem paths are set in a configuration | ||
|
|
301 | + file, by default located in /usr/local/etc/isolate. | ||
|
|
302 | + | ||
|
|
303 | + Before you run isolate with control groups, you need to ensure that the cgroup | ||
|
|
304 | + filesystem is enabled and mounted. Most modern Linux distributions already | ||
|
|
305 | + provide cgroup support through a tmpfs mounted at /sys/fs/cgroup, with | ||
|
|
306 | + individual controllers mounted within subdirectories. | ||
|
|
307 | + | ||
|
|
308 | + REPRODUCIBILITY | ||
|
|
309 | + --------------- | ||
|
|
310 | + | ||
|
|
311 | + The reproducibility of results can be improved by tuning some kernel | ||
|
|
312 | + parameters, listed below. Some of these parameters can be checked using the | ||
|
|
313 | + program isolate-check-environment. | ||
|
|
314 | + | ||
|
|
315 | + * Disable address space randomization: +sysctl kernel.randomize_va_space=0+. | ||
|
|
316 | + Address space randomization can affect timing, memory usage, and program | ||
|
|
317 | + behavior. This setting can be made persistent through /etc/sysctl.d/. | ||
|
|
318 | + | ||
|
|
319 | + * Disable dynamic CPU frequency scaling. This requires setting the cpufreq | ||
|
|
320 | + scaling governor to +performance+. The process for doing this varies between | ||
|
|
321 | + distributions. | ||
|
|
322 | + | ||
|
|
323 | + * Consider disabling Turboboost on CPUs that might support it (most i3/i5/i7 | ||
|
|
324 | + Intel CPUs). Approach this one with caution. Disabling a CPU that Turboboosts | ||
|
|
325 | + from 2.3 GHz to 2.6 GHz would have minimal impact on run-times in exchange | ||
|
|
326 | + for determinism, but the same on a CPU that Turboboosts from 1.6 GHz to 2.8 | ||
|
|
327 | + GHz will incur a much more dramatic slowdown. Perhaps if the ambient | ||
|
|
328 | + temperature is controlled and only one single-threaded task is keeping the | ||
|
|
329 | + CPU busy at 100%, then TB's behaviour may be reasonably deterministic; | ||
|
|
330 | + requires further experimentation to confirm. | ||
|
|
331 | + | ||
|
|
332 | + * Run evaluations on a single CPU (core). The Linux scheduler has a tendency to randomly | ||
|
|
333 | + migrate tasks between CPUs, incurring cache migration costs. You can use isolate's | ||
|
|
334 | + configuration file to pin the process to a specified CPU. | ||
|
|
335 | + | ||
|
|
336 | + * Disable automatic kernel support for transparent huge pages. Both /sys/kernel/mm/transparent_hugepage/enabled | ||
|
|
337 | + and /sys/kernel/mm/transparent_hugepage/defrag should be set to "madvise" or "never", and | ||
|
|
338 | + /sys/kernel/mm/transparent_hugepage/khugepaged/defrag to 0. | ||
|
|
339 | + | ||
|
|
340 | + * Disable swapping. If you really need swap space and you are using cgroups, | ||
|
|
341 | + make sure that you have the memsw controller enabled, so that swap space is | ||
|
|
342 | + properly accounted for. | ||
|
|
343 | + | ||
|
|
344 | + LICENSE | ||
|
|
345 | + ------- | ||
|
|
346 | + Isolate was written by Martin Mares and Bernard Blackham. | ||
|
|
347 | + It can be distributed and used under the terms of the GNU | ||
|
|
348 | + General Public License version 2 or any later version. |
This diff has been collapsed as it changes many lines, (1122 lines changed) Show them Hide them | |||||
@@ -0,0 +1,1122 | |||||
|
|
1 | + /* | ||
|
|
2 | + * A Process Isolator based on Linux Containers | ||
|
|
3 | + * | ||
|
|
4 | + * (c) 2012-2018 Martin Mares <mj@ucw.cz> | ||
|
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | ||
|
|
6 | + */ | ||
|
|
7 | + | ||
|
|
8 | + #include "isolate.h" | ||
|
|
9 | + | ||
|
|
10 | + #include <errno.h> | ||
|
|
11 | + #include <fcntl.h> | ||
|
|
12 | + #include <getopt.h> | ||
|
|
13 | + #include <grp.h> | ||
|
|
14 | + #include <sched.h> | ||
|
|
15 | + #include <stdio.h> | ||
|
|
16 | + #include <stdlib.h> | ||
|
|
17 | + #include <string.h> | ||
|
|
18 | + #include <sys/mount.h> | ||
|
|
19 | + #include <sys/resource.h> | ||
|
|
20 | + #include <sys/signal.h> | ||
|
|
21 | + #include <sys/stat.h> | ||
|
|
22 | + #include <sys/time.h> | ||
|
|
23 | + #include <sys/vfs.h> | ||
|
|
24 | + #include <sys/wait.h> | ||
|
|
25 | + #include <time.h> | ||
|
|
26 | + #include <unistd.h> | ||
|
|
27 | + | ||
|
|
28 | + /* May not be defined in older glibc headers */ | ||
|
|
29 | + #ifndef MS_PRIVATE | ||
|
|
30 | + #warning "Working around old glibc: no MS_PRIVATE" | ||
|
|
31 | + #define MS_PRIVATE (1 << 18) | ||
|
|
32 | + #endif | ||
|
|
33 | + #ifndef MS_REC | ||
|
|
34 | + #warning "Working around old glibc: no MS_REC" | ||
|
|
35 | + #define MS_REC (1 << 14) | ||
|
|
36 | + #endif | ||
|
|
37 | + | ||
|
|
38 | + /* | ||
|
|
39 | + * Theory of operation | ||
|
|
40 | + * | ||
|
|
41 | + * Generally, we want to run a process inside a namespace/cgroup and watch it | ||
|
|
42 | + * from the outside. However, the reality is a little bit more complicated as we | ||
|
|
43 | + * do not want the inside process to become the init process of the PID namespace | ||
|
|
44 | + * (we want to have all signals properly delivered). | ||
|
|
45 | + * | ||
|
|
46 | + * We are running three processes: | ||
|
|
47 | + * | ||
|
|
48 | + * - Keeper process (root privileges, parent namespace, parent cgroups) | ||
|
|
49 | + * - Proxy process (UID/GID of the calling user, init process of the child | ||
|
|
50 | + * namespace, parent cgroups) | ||
|
|
51 | + * - Inside process (per-box UID/GID, child namespace, child cgroups) | ||
|
|
52 | + * | ||
|
|
53 | + * The proxy process just waits for the inside process to exit and then it passes | ||
|
|
54 | + * the exit status to the keeper. | ||
|
|
55 | + * | ||
|
|
56 | + * We use two pipes: | ||
|
|
57 | + * | ||
|
|
58 | + * - Error pipe for error messages produced by the proxy process and the early | ||
|
|
59 | + * stages of the inside process (until exec()). Listened to by the keeper. | ||
|
|
60 | + * - Status pipe for passing the PID of the inside process and its exit status | ||
|
|
61 | + * from the proxy to the keeper. | ||
|
|
62 | + */ | ||
|
|
63 | + | ||
|
|
64 | + #define TIMER_INTERVAL_US 100000 | ||
|
|
65 | + | ||
|
|
66 | + static int timeout; /* milliseconds */ | ||
|
|
67 | + static int wall_timeout; | ||
|
|
68 | + static int extra_timeout; | ||
|
|
69 | + int pass_environ; | ||
|
|
70 | + int verbose; | ||
|
|
71 | + static int silent; | ||
|
|
72 | + static int fsize_limit; | ||
|
|
73 | + static int memory_limit; | ||
|
|
74 | + static int stack_limit; | ||
|
|
75 | + int block_quota; | ||
|
|
76 | + int inode_quota; | ||
|
|
77 | + static int max_processes = 1; | ||
|
|
78 | + static char *redir_stdin, *redir_stdout, *redir_stderr; | ||
|
|
79 | + static int redir_stderr_to_stdout; | ||
|
|
80 | + static char *set_cwd; | ||
|
|
81 | + static int share_net; | ||
|
|
82 | + static int inherit_fds; | ||
|
|
83 | + static int default_dirs = 1; | ||
|
|
84 | + | ||
|
|
85 | + int cg_enable; | ||
|
|
86 | + int cg_memory_limit; | ||
|
|
87 | + int cg_timing = 1; | ||
|
|
88 | + | ||
|
|
89 | + int box_id; | ||
|
|
90 | + static char box_dir[1024]; | ||
|
|
91 | + static pid_t box_pid; | ||
|
|
92 | + static pid_t proxy_pid; | ||
|
|
93 | + | ||
|
|
94 | + uid_t box_uid; | ||
|
|
95 | + gid_t box_gid; | ||
|
|
96 | + uid_t orig_uid; | ||
|
|
97 | + gid_t orig_gid; | ||
|
|
98 | + | ||
|
|
99 | + static int partial_line; | ||
|
|
100 | + static int cleanup_ownership; | ||
|
|
101 | + | ||
|
|
102 | + static struct timeval start_time; | ||
|
|
103 | + static int ticks_per_sec; | ||
|
|
104 | + static int total_ms, wall_ms; | ||
|
|
105 | + static volatile sig_atomic_t timer_tick, interrupt; | ||
|
|
106 | + | ||
|
|
107 | + static int error_pipes[2]; | ||
|
|
108 | + static int write_errors_to_fd; | ||
|
|
109 | + static int read_errors_from_fd; | ||
|
|
110 | + | ||
|
|
111 | + static int status_pipes[2]; | ||
|
|
112 | + | ||
|
|
113 | + static int get_wall_time_ms(void); | ||
|
|
114 | + static int get_run_time_ms(struct rusage *rus); | ||
|
|
115 | + | ||
|
|
116 | + /*** Messages and exits ***/ | ||
|
|
117 | + | ||
|
|
118 | + static void | ||
|
|
119 | + final_stats(struct rusage *rus) | ||
|
|
120 | + { | ||
|
|
121 | + total_ms = get_run_time_ms(rus); | ||
|
|
122 | + wall_ms = get_wall_time_ms(); | ||
|
|
123 | + | ||
|
|
124 | + meta_printf("time:%d.%03d\n", total_ms/1000, total_ms%1000); | ||
|
|
125 | + meta_printf("time-wall:%d.%03d\n", wall_ms/1000, wall_ms%1000); | ||
|
|
126 | + meta_printf("max-rss:%ld\n", rus->ru_maxrss); | ||
|
|
127 | + meta_printf("csw-voluntary:%ld\n", rus->ru_nvcsw); | ||
|
|
128 | + meta_printf("csw-forced:%ld\n", rus->ru_nivcsw); | ||
|
|
129 | + | ||
|
|
130 | + cg_stats(); | ||
|
|
131 | + } | ||
|
|
132 | + | ||
|
|
133 | + static void NONRET | ||
|
|
134 | + box_exit(int rc) | ||
|
|
135 | + { | ||
|
|
136 | + if (proxy_pid > 0) | ||
|
|
137 | + { | ||
|
|
138 | + if (box_pid > 0) | ||
|
|
139 | + { | ||
|
|
140 | + kill(-box_pid, SIGKILL); | ||
|
|
141 | + kill(box_pid, SIGKILL); | ||
|
|
142 | + } | ||
|
|
143 | + kill(-proxy_pid, SIGKILL); | ||
|
|
144 | + kill(proxy_pid, SIGKILL); | ||
|
|
145 | + meta_printf("killed:1\n"); | ||
|
|
146 | + | ||
|
|
147 | + struct rusage rus; | ||
|
|
148 | + int p, stat; | ||
|
|
149 | + do | ||
|
|
150 | + p = wait4(proxy_pid, &stat, 0, &rus); | ||
|
|
151 | + while (p < 0 && errno == EINTR); | ||
|
|
152 | + if (p < 0) | ||
|
|
153 | + fprintf(stderr, "UGH: Lost track of the process (%m)\n"); | ||
|
|
154 | + else | ||
|
|
155 | + final_stats(&rus); | ||
|
|
156 | + } | ||
|
|
157 | + | ||
|
|
158 | + if (rc < 2 && cleanup_ownership) | ||
|
|
159 | + chowntree("box", orig_uid, orig_gid); | ||
|
|
160 | + | ||
|
|
161 | + meta_close(); | ||
|
|
162 | + exit(rc); | ||
|
|
163 | + } | ||
|
|
164 | + | ||
|
|
165 | + static void | ||
|
|
166 | + flush_line(void) | ||
|
|
167 | + { | ||
|
|
168 | + if (partial_line) | ||
|
|
169 | + fputc('\n', stderr); | ||
|
|
170 | + partial_line = 0; | ||
|
|
171 | + } | ||
|
|
172 | + | ||
|
|
173 | + /* Report an error of the sandbox itself */ | ||
|
|
174 | + void NONRET __attribute__((format(printf,1,2))) | ||
|
|
175 | + die(char *msg, ...) | ||
|
|
176 | + { | ||
|
|
177 | + va_list args; | ||
|
|
178 | + va_start(args, msg); | ||
|
|
179 | + char buf[1024]; | ||
|
|
180 | + int n = vsnprintf(buf, sizeof(buf), msg, args); | ||
|
|
181 | + | ||
|
|
182 | + // If the child processes are still running, show no mercy. | ||
|
|
183 | + if (box_pid > 0) | ||
|
|
184 | + { | ||
|
|
185 | + kill(-box_pid, SIGKILL); | ||
|
|
186 | + kill(box_pid, SIGKILL); | ||
|
|
187 | + } | ||
|
|
188 | + if (proxy_pid > 0) | ||
|
|
189 | + { | ||
|
|
190 | + kill(-proxy_pid, SIGKILL); | ||
|
|
191 | + kill(proxy_pid, SIGKILL); | ||
|
|
192 | + } | ||
|
|
193 | + | ||
|
|
194 | + if (write_errors_to_fd) | ||
|
|
195 | + { | ||
|
|
196 | + // We are inside the box, have to use error pipe for error reporting. | ||
|
|
197 | + // We hope that the whole error message fits in PIPE_BUF bytes. | ||
|
|
198 | + write(write_errors_to_fd, buf, n); | ||
|
|
199 | + exit(2); | ||
|
|
200 | + } | ||
|
|
201 | + | ||
|
|
202 | + // Otherwise, we in the box keeper process, so we report errors normally | ||
|
|
203 | + flush_line(); | ||
|
|
204 | + meta_printf("status:XX\nmessage:%s\n", buf); | ||
|
|
205 | + fputs(buf, stderr); | ||
|
|
206 | + fputc('\n', stderr); | ||
|
|
207 | + box_exit(2); | ||
|
|
208 | + } | ||
|
|
209 | + | ||
|
|
210 | + /* Report an error of the program inside the sandbox */ | ||
|
|
211 | + void NONRET __attribute__((format(printf,1,2))) | ||
|
|
212 | + err(char *msg, ...) | ||
|
|
213 | + { | ||
|
|
214 | + va_list args; | ||
|
|
215 | + va_start(args, msg); | ||
|
|
216 | + flush_line(); | ||
|
|
217 | + if (msg[0] && msg[1] && msg[2] == ':' && msg[3] == ' ') | ||
|
|
218 | + { | ||
|
|
219 | + meta_printf("status:%c%c\n", msg[0], msg[1]); | ||
|
|
220 | + msg += 4; | ||
|
|
221 | + } | ||
|
|
222 | + char buf[1024]; | ||
|
|
223 | + vsnprintf(buf, sizeof(buf), msg, args); | ||
|
|
224 | + meta_printf("message:%s\n", buf); | ||
|
|
225 | + if (!silent) | ||
|
|
226 | + { | ||
|
|
227 | + fputs(buf, stderr); | ||
|
|
228 | + fputc('\n', stderr); | ||
|
|
229 | + } | ||
|
|
230 | + box_exit(1); | ||
|
|
231 | + } | ||
|
|
232 | + | ||
|
|
233 | + /* Write a message, but only if in verbose mode */ | ||
|
|
234 | + void __attribute__((format(printf,1,2))) | ||
|
|
235 | + msg(char *msg, ...) | ||
|
|
236 | + { | ||
|
|
237 | + va_list args; | ||
|
|
238 | + va_start(args, msg); | ||
|
|
239 | + if (verbose) | ||
|
|
240 | + { | ||
|
|
241 | + int len = strlen(msg); | ||
|
|
242 | + if (len > 0) | ||
|
|
243 | + partial_line = (msg[len-1] != '\n'); | ||
|
|
244 | + vfprintf(stderr, msg, args); | ||
|
|
245 | + fflush(stderr); | ||
|
|
246 | + } | ||
|
|
247 | + va_end(args); | ||
|
|
248 | + } | ||
|
|
249 | + | ||
|
|
250 | + /*** Signal handling in keeper process ***/ | ||
|
|
251 | + | ||
|
|
252 | + /* | ||
|
|
253 | + * Signal handling is tricky. We must set up signal handlers before | ||
|
|
254 | + * we start the child process (and reset them in the child process). | ||
|
|
255 | + * Otherwise, there is a short time window where a SIGINT can kill | ||
|
|
256 | + * us and leave the child process running. | ||
|
|
257 | + */ | ||
|
|
258 | + | ||
|
|
259 | + struct signal_rule { | ||
|
|
260 | + int signum; | ||
|
|
261 | + enum { SIGNAL_IGNORE, SIGNAL_INTERRUPT, SIGNAL_FATAL } action; | ||
|
|
262 | + }; | ||
|
|
263 | + | ||
|
|
264 | + static const struct signal_rule signal_rules[] = { | ||
|
|
265 | + { SIGHUP, SIGNAL_INTERRUPT }, | ||
|
|
266 | + { SIGINT, SIGNAL_INTERRUPT }, | ||
|
|
267 | + { SIGQUIT, SIGNAL_INTERRUPT }, | ||
|
|
268 | + { SIGILL, SIGNAL_FATAL }, | ||
|
|
269 | + { SIGABRT, SIGNAL_FATAL }, | ||
|
|
270 | + { SIGFPE, SIGNAL_FATAL }, | ||
|
|
271 | + { SIGSEGV, SIGNAL_FATAL }, | ||
|
|
272 | + { SIGPIPE, SIGNAL_IGNORE }, | ||
|
|
273 | + { SIGTERM, SIGNAL_INTERRUPT }, | ||
|
|
274 | + { SIGUSR1, SIGNAL_IGNORE }, | ||
|
|
275 | + { SIGUSR2, SIGNAL_IGNORE }, | ||
|
|
276 | + { SIGBUS, SIGNAL_FATAL }, | ||
|
|
277 | + }; | ||
|
|
278 | + | ||
|
|
279 | + static void | ||
|
|
280 | + signal_alarm(int unused UNUSED) | ||
|
|
281 | + { | ||
|
|
282 | + /* Time limit checks are synchronous, so we only schedule them there. */ | ||
|
|
283 | + timer_tick = 1; | ||
|
|
284 | + msg("[timer]"); | ||
|
|
285 | + } | ||
|
|
286 | + | ||
|
|
287 | + static void | ||
|
|
288 | + signal_int(int signum) | ||
|
|
289 | + { | ||
|
|
290 | + /* Interrupts (e.g., SIGINT) are synchronous, too. */ | ||
|
|
291 | + interrupt = signum; | ||
|
|
292 | + } | ||
|
|
293 | + | ||
|
|
294 | + static void | ||
|
|
295 | + signal_fatal(int signum) | ||
|
|
296 | + { | ||
|
|
297 | + /* If we receive SIGSEGV or a similar signal, we try to die gracefully. */ | ||
|
|
298 | + die("Sandbox keeper received fatal signal %d", signum); | ||
|
|
299 | + } | ||
|
|
300 | + | ||
|
|
301 | + static void | ||
|
|
302 | + setup_signals(void) | ||
|
|
303 | + { | ||
|
|
304 | + struct sigaction sa_int, sa_fatal; | ||
|
|
305 | + bzero(&sa_int, sizeof(sa_int)); | ||
|
|
306 | + sa_int.sa_handler = signal_int; | ||
|
|
307 | + bzero(&sa_fatal, sizeof(sa_fatal)); | ||
|
|
308 | + sa_fatal.sa_handler = signal_fatal; | ||
|
|
309 | + | ||
|
|
310 | + for (int i=0; i < ARRAY_SIZE(signal_rules); i++) | ||
|
|
311 | + { | ||
|
|
312 | + const struct signal_rule *sr = &signal_rules[i]; | ||
|
|
313 | + switch (sr->action) | ||
|
|
314 | + { | ||
|
|
315 | + case SIGNAL_IGNORE: | ||
|
|
316 | + signal(sr->signum, SIG_IGN); | ||
|
|
317 | + break; | ||
|
|
318 | + case SIGNAL_INTERRUPT: | ||
|
|
319 | + sigaction(sr->signum, &sa_int, NULL); | ||
|
|
320 | + break; | ||
|
|
321 | + case SIGNAL_FATAL: | ||
|
|
322 | + sigaction(sr->signum, &sa_fatal, NULL); | ||
|
|
323 | + break; | ||
|
|
324 | + default: | ||
|
|
325 | + die("Invalid signal rule"); | ||
|
|
326 | + } | ||
|
|
327 | + } | ||
|
|
328 | + } | ||
|
|
329 | + | ||
|
|
330 | + static void | ||
|
|
331 | + reset_signals(void) | ||
|
|
332 | + { | ||
|
|
333 | + for (int i=0; i < ARRAY_SIZE(signal_rules); i++) | ||
|
|
334 | + signal(signal_rules[i].signum, SIG_DFL); | ||
|
|
335 | + } | ||
|
|
336 | + | ||
|
|
337 | + /*** The keeper process ***/ | ||
|
|
338 | + | ||
|
|
339 | + #define PROC_BUF_SIZE 4096 | ||
|
|
340 | + static int | ||
|
|
341 | + read_proc_file(char *buf, char *name, int *fdp) | ||
|
|
342 | + { | ||
|
|
343 | + int c; | ||
|
|
344 | + | ||
|
|
345 | + if (*fdp < 0) | ||
|
|
346 | + { | ||
|
|
347 | + snprintf(buf, PROC_BUF_SIZE, "/proc/%d/%s", (int) box_pid, name); | ||
|
|
348 | + *fdp = open(buf, O_RDONLY); | ||
|
|
349 | + if (*fdp < 0) | ||
|
|
350 | + return 0; // This is OK, the process could have finished | ||
|
|
351 | + } | ||
|
|
352 | + lseek(*fdp, 0, SEEK_SET); | ||
|
|
353 | + if ((c = read(*fdp, buf, PROC_BUF_SIZE-1)) < 0) | ||
|
|
354 | + { | ||
|
|
355 | + // Even this could fail if the process disappeared since open() | ||
|
|
356 | + return 0; | ||
|
|
357 | + } | ||
|
|
358 | + if (c >= PROC_BUF_SIZE-1) | ||
|
|
359 | + die("/proc/$pid/%s too long", name); | ||
|
|
360 | + buf[c] = 0; | ||
|
|
361 | + return 1; | ||
|
|
362 | + } | ||
|
|
363 | + | ||
|
|
364 | + static int | ||
|
|
365 | + get_wall_time_ms(void) | ||
|
|
366 | + { | ||
|
|
367 | + struct timeval now, wall; | ||
|
|
368 | + gettimeofday(&now, NULL); | ||
|
|
369 | + timersub(&now, &start_time, &wall); | ||
|
|
370 | + return wall.tv_sec*1000 + wall.tv_usec/1000; | ||
|
|
371 | + } | ||
|
|
372 | + | ||
|
|
373 | + static int | ||
|
|
374 | + get_run_time_ms(struct rusage *rus) | ||
|
|
375 | + { | ||
|
|
376 | + if (cg_enable && cg_timing) | ||
|
|
377 | + return cg_get_run_time_ms(); | ||
|
|
378 | + | ||
|
|
379 | + if (rus) | ||
|
|
380 | + { | ||
|
|
381 | + struct timeval total; | ||
|
|
382 | + timeradd(&rus->ru_utime, &rus->ru_stime, &total); | ||
|
|
383 | + return total.tv_sec*1000 + total.tv_usec/1000; | ||
|
|
384 | + } | ||
|
|
385 | + | ||
|
|
386 | + // It might happen that we do not know the box_pid (see comments in find_box_pid()) | ||
|
|
387 | + if (!box_pid) | ||
|
|
388 | + return 0; | ||
|
|
389 | + | ||
|
|
390 | + char buf[PROC_BUF_SIZE], *x; | ||
|
|
391 | + int utime, stime; | ||
|
|
392 | + static int proc_stat_fd = -1; | ||
|
|
393 | + | ||
|
|
394 | + if (!read_proc_file(buf, "stat", &proc_stat_fd)) | ||
|
|
395 | + return 0; | ||
|
|
396 | + x = buf; | ||
|
|
397 | + while (*x && *x != ' ') | ||
|
|
398 | + x++; | ||
|
|
399 | + while (*x == ' ') | ||
|
|
400 | + x++; | ||
|
|
401 | + if (*x++ != '(') | ||
|
|
402 | + die("proc stat syntax error 1"); | ||
|
|
403 | + while (*x && (*x != ')' || x[1] != ' ')) | ||
|
|
404 | + x++; | ||
|
|
405 | + while (*x == ')' || *x == ' ') | ||
|
|
406 | + x++; | ||
|
|
407 | + if (sscanf(x, "%*c %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %d %d", &utime, &stime) != 2) | ||
|
|
408 | + die("proc stat syntax error 2"); | ||
|
|
409 | + | ||
|
|
410 | + return (utime + stime) * 1000 / ticks_per_sec; | ||
|
|
411 | + } | ||
|
|
412 | + | ||
|
|
413 | + static void | ||
|
|
414 | + check_timeout(void) | ||
|
|
415 | + { | ||
|
|
416 | + if (wall_timeout) | ||
|
|
417 | + { | ||
|
|
418 | + int wall_ms = get_wall_time_ms(); | ||
|
|
419 | + if (wall_ms > wall_timeout) | ||
|
|
420 | + err("TO: Time limit exceeded (wall clock)"); | ||
|
|
421 | + if (verbose > 1) | ||
|
|
422 | + fprintf(stderr, "[wall time check: %d msec]\n", wall_ms); | ||
|
|
423 | + } | ||
|
|
424 | + if (timeout) | ||
|
|
425 | + { | ||
|
|
426 | + int ms = get_run_time_ms(NULL); | ||
|
|
427 | + if (verbose > 1) | ||
|
|
428 | + fprintf(stderr, "[time check: %d msec]\n", ms); | ||
|
|
429 | + if (ms > timeout && ms > extra_timeout) | ||
|
|
430 | + err("TO: Time limit exceeded"); | ||
|
|
431 | + } | ||
|
|
432 | + } | ||
|
|
433 | + | ||
|
|
434 | + static void | ||
|
|
435 | + box_keeper(void) | ||
|
|
436 | + { | ||
|
|
437 | + read_errors_from_fd = error_pipes[0]; | ||
|
|
438 | + close(error_pipes[1]); | ||
|
|
439 | + close(status_pipes[1]); | ||
|
|
440 | + | ||
|
|
441 | + gettimeofday(&start_time, NULL); | ||
|
|
442 | + ticks_per_sec = sysconf(_SC_CLK_TCK); | ||
|
|
443 | + if (ticks_per_sec <= 0) | ||
|
|
444 | + die("Invalid ticks_per_sec!"); | ||
|
|
445 | + | ||
|
|
446 | + if (timeout || wall_timeout) | ||
|
|
447 | + { | ||
|
|
448 | + struct sigaction sa; | ||
|
|
449 | + bzero(&sa, sizeof(sa)); | ||
|
|
450 | + sa.sa_handler = signal_alarm; | ||
|
|
451 | + sigaction(SIGALRM, &sa, NULL); | ||
|
|
452 | + struct itimerval timer = { | ||
|
|
453 | + .it_interval = { .tv_usec = TIMER_INTERVAL_US }, | ||
|
|
454 | + .it_value = { .tv_usec = TIMER_INTERVAL_US }, | ||
|
|
455 | + }; | ||
|
|
456 | + setitimer(ITIMER_REAL, &timer, NULL); | ||
|
|
457 | + } | ||
|
|
458 | + | ||
|
|
459 | + for(;;) | ||
|
|
460 | + { | ||
|
|
461 | + struct rusage rus; | ||
|
|
462 | + int stat; | ||
|
|
463 | + pid_t p; | ||
|
|
464 | + if (interrupt) | ||
|
|
465 | + { | ||
|
|
466 | + meta_printf("exitsig:%d\n", interrupt); | ||
|
|
467 | + err("SG: Interrupted"); | ||
|
|
468 | + } | ||
|
|
469 | + if (timer_tick) | ||
|
|
470 | + { | ||
|
|
471 | + check_timeout(); | ||
|
|
472 | + timer_tick = 0; | ||
|
|
473 | + } | ||
|
|
474 | + p = wait4(proxy_pid, &stat, 0, &rus); | ||
|
|
475 | + if (p < 0) | ||
|
|
476 | + { | ||
|
|
477 | + if (errno == EINTR) | ||
|
|
478 | + continue; | ||
|
|
479 | + die("wait4: %m"); | ||
|
|
480 | + } | ||
|
|
481 | + if (p != proxy_pid) | ||
|
|
482 | + die("wait4: unknown pid %d exited!", p); | ||
|
|
483 | + proxy_pid = 0; | ||
|
|
484 | + | ||
|
|
485 | + // Check error pipe if there is an internal error passed from inside the box | ||
|
|
486 | + char interr[1024]; | ||
|
|
487 | + int n = read(read_errors_from_fd, interr, sizeof(interr) - 1); | ||
|
|
488 | + if (n > 0) | ||
|
|
489 | + { | ||
|
|
490 | + interr[n] = 0; | ||
|
|
491 | + die("%s", interr); | ||
|
|
492 | + } | ||
|
|
493 | + | ||
|
|
494 | + // Check status pipe if there is an exit status reported by the proxy process | ||
|
|
495 | + n = read(status_pipes[0], &stat, sizeof(stat)); | ||
|
|
496 | + if (n != sizeof(stat)) | ||
|
|
497 | + die("Did not receive exit status from proxy"); | ||
|
|
498 | + | ||
|
|
499 | + final_stats(&rus); | ||
|
|
500 | + if (timeout && total_ms > timeout) | ||
|
|
501 | + err("TO: Time limit exceeded"); | ||
|
|
502 | + if (wall_timeout && wall_ms > wall_timeout) | ||
|
|
503 | + err("TO: Time limit exceeded (wall clock)"); | ||
|
|
504 | + | ||
|
|
505 | + if (WIFEXITED(stat)) | ||
|
|
506 | + { | ||
|
|
507 | + meta_printf("exitcode:%d\n", WEXITSTATUS(stat)); | ||
|
|
508 | + if (WEXITSTATUS(stat)) | ||
|
|
509 | + err("RE: Exited with error status %d", WEXITSTATUS(stat)); | ||
|
|
510 | + flush_line(); | ||
|
|
511 | + if (!silent) | ||
|
|
512 | + { | ||
|
|
513 | + fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall)\n", | ||
|
|
514 | + total_ms/1000, total_ms%1000, | ||
|
|
515 | + wall_ms/1000, wall_ms%1000); | ||
|
|
516 | + } | ||
|
|
517 | + box_exit(0); | ||
|
|
518 | + } | ||
|
|
519 | + else if (WIFSIGNALED(stat)) | ||
|
|
520 | + { | ||
|
|
521 | + meta_printf("exitsig:%d\n", WTERMSIG(stat)); | ||
|
|
522 | + err("SG: Caught fatal signal %d", WTERMSIG(stat)); | ||
|
|
523 | + } | ||
|
|
524 | + else if (WIFSTOPPED(stat)) | ||
|
|
525 | + { | ||
|
|
526 | + meta_printf("exitsig:%d\n", WSTOPSIG(stat)); | ||
|
|
527 | + err("SG: Stopped by signal %d", WSTOPSIG(stat)); | ||
|
|
528 | + } | ||
|
|
529 | + else | ||
|
|
530 | + die("wait4: unknown status %x, giving up!", stat); | ||
|
|
531 | + } | ||
|
|
532 | + } | ||
|
|
533 | + | ||
|
|
534 | + /*** The process running inside the box ***/ | ||
|
|
535 | + | ||
|
|
536 | + static void | ||
|
|
537 | + setup_root(void) | ||
|
|
538 | + { | ||
|
|
539 | + if (mkdir("root", 0750) < 0 && errno != EEXIST) | ||
|
|
540 | + die("mkdir('root'): %m"); | ||
|
|
541 | + | ||
|
|
542 | + /* | ||
|
|
543 | + * Ensure all mounts are private, not shared. We don't want our mounts | ||
|
|
544 | + * appearing outside of our namespace. | ||
|
|
545 | + * (systemd since version 188 mounts filesystems shared by default). | ||
|
|
546 | + */ | ||
|
|
547 | + if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) | ||
|
|
548 | + die("Cannot privatize mounts: %m"); | ||
|
|
549 | + | ||
|
|
550 | + if (mount("none", "root", "tmpfs", 0, "mode=755") < 0) | ||
|
|
551 | + die("Cannot mount root ramdisk: %m"); | ||
|
|
552 | + | ||
|
|
553 | + apply_dir_rules(default_dirs); | ||
|
|
554 | + | ||
|
|
555 | + if (chroot("root") < 0) | ||
|
|
556 | + die("Chroot failed: %m"); | ||
|
|
557 | + | ||
|
|
558 | + if (chdir("root/box") < 0) | ||
|
|
559 | + die("Cannot change current directory: %m"); | ||
|
|
560 | + } | ||
|
|
561 | + | ||
|
|
562 | + static void | ||
|
|
563 | + setup_credentials(void) | ||
|
|
564 | + { | ||
|
|
565 | + if (setresgid(box_gid, box_gid, box_gid) < 0) | ||
|
|
566 | + die("setresgid: %m"); | ||
|
|
567 | + if (setgroups(0, NULL) < 0) | ||
|
|
568 | + die("setgroups: %m"); | ||
|
|
569 | + if (setresuid(box_uid, box_uid, box_uid) < 0) | ||
|
|
570 | + die("setresuid: %m"); | ||
|
|
571 | + setpgrp(); | ||
|
|
572 | + } | ||
|
|
573 | + | ||
|
|
574 | + static void | ||
|
|
575 | + setup_fds(void) | ||
|
|
576 | + { | ||
|
|
577 | + if (redir_stdin) | ||
|
|
578 | + { | ||
|
|
579 | + close(0); | ||
|
|
580 | + if (open(redir_stdin, O_RDONLY) != 0) | ||
|
|
581 | + die("open(\"%s\"): %m", redir_stdin); | ||
|
|
582 | + } | ||
|
|
583 | + if (redir_stdout) | ||
|
|
584 | + { | ||
|
|
585 | + close(1); | ||
|
|
586 | + if (open(redir_stdout, O_WRONLY | O_CREAT | O_TRUNC, 0666) != 1) | ||
|
|
587 | + die("open(\"%s\"): %m", redir_stdout); | ||
|
|
588 | + } | ||
|
|
589 | + if (redir_stderr) | ||
|
|
590 | + { | ||
|
|
591 | + close(2); | ||
|
|
592 | + if (open(redir_stderr, O_WRONLY | O_CREAT | O_TRUNC, 0666) != 2) | ||
|
|
593 | + die("open(\"%s\"): %m", redir_stderr); | ||
|
|
594 | + } | ||
|
|
595 | + if (redir_stderr_to_stdout) | ||
|
|
596 | + { | ||
|
|
597 | + if (dup2(1, 2) < 0) | ||
|
|
598 | + die("Cannot dup stdout to stderr: %m"); | ||
|
|
599 | + } | ||
|
|
600 | + } | ||
|
|
601 | + | ||
|
|
602 | + static void | ||
|
|
603 | + setup_rlim(const char *res_name, int res, rlim_t limit) | ||
|
|
604 | + { | ||
|
|
605 | + struct rlimit rl = { .rlim_cur = limit, .rlim_max = limit }; | ||
|
|
606 | + if (setrlimit(res, &rl) < 0) | ||
|
|
607 | + die("setrlimit(%s, %jd)", res_name, (intmax_t) limit); | ||
|
|
608 | + } | ||
|
|
609 | + | ||
|
|
610 | + static void | ||
|
|
611 | + setup_rlimits(void) | ||
|
|
612 | + { | ||
|
|
613 | + #define RLIM(res, val) setup_rlim("RLIMIT_" #res, RLIMIT_##res, val) | ||
|
|
614 | + | ||
|
|
615 | + if (memory_limit) | ||
|
|
616 | + RLIM(AS, (rlim_t)memory_limit * 1024); | ||
|
|
617 | + | ||
|
|
618 | + if (fsize_limit) | ||
|
|
619 | + RLIM(FSIZE, (rlim_t)fsize_limit * 1024); | ||
|
|
620 | + | ||
|
|
621 | + RLIM(STACK, (stack_limit ? (rlim_t)stack_limit * 1024 : RLIM_INFINITY)); | ||
|
|
622 | + RLIM(NOFILE, 64); | ||
|
|
623 | + RLIM(MEMLOCK, 0); | ||
|
|
624 | + | ||
|
|
625 | + if (max_processes) | ||
|
|
626 | + RLIM(NPROC, max_processes); | ||
|
|
627 | + | ||
|
|
628 | + #undef RLIM | ||
|
|
629 | + } | ||
|
|
630 | + | ||
|
|
631 | + static int | ||
|
|
632 | + box_inside(char **args) | ||
|
|
633 | + { | ||
|
|
634 | + cg_enter(); | ||
|
|
635 | + setup_root(); | ||
|
|
636 | + setup_rlimits(); | ||
|
|
637 | + setup_credentials(); | ||
|
|
638 | + setup_fds(); | ||
|
|
639 | + char **env = setup_environment(); | ||
|
|
640 | + | ||
|
|
641 | + if (set_cwd && chdir(set_cwd)) | ||
|
|
642 | + die("chdir: %m"); | ||
|
|
643 | + | ||
|
|
644 | + execve(args[0], args, env); | ||
|
|
645 | + die("execve(\"%s\"): %m", args[0]); | ||
|
|
646 | + } | ||
|
|
647 | + | ||
|
|
648 | + /*** Proxy ***/ | ||
|
|
649 | + | ||
|
|
650 | + static void | ||
|
|
651 | + setup_orig_credentials(void) | ||
|
|
652 | + { | ||
|
|
653 | + if (setresgid(orig_gid, orig_gid, orig_gid) < 0) | ||
|
|
654 | + die("setresgid: %m"); | ||
|
|
655 | + if (setgroups(0, NULL) < 0) | ||
|
|
656 | + die("setgroups: %m"); | ||
|
|
657 | + if (setresuid(orig_uid, orig_uid, orig_uid) < 0) | ||
|
|
658 | + die("setresuid: %m"); | ||
|
|
659 | + } | ||
|
|
660 | + | ||
|
|
661 | + static int | ||
|
|
662 | + box_proxy(void *arg) | ||
|
|
663 | + { | ||
|
|
664 | + char **args = arg; | ||
|
|
665 | + | ||
|
|
666 | + write_errors_to_fd = error_pipes[1]; | ||
|
|
667 | + close(error_pipes[0]); | ||
|
|
668 | + close(status_pipes[0]); | ||
|
|
669 | + meta_close(); | ||
|
|
670 | + reset_signals(); | ||
|
|
671 | + | ||
|
|
672 | + pid_t inside_pid = fork(); | ||
|
|
673 | + if (inside_pid < 0) | ||
|
|
674 | + die("Cannot run process, fork failed: %m"); | ||
|
|
675 | + else if (!inside_pid) | ||
|
|
676 | + { | ||
|
|
677 | + close(status_pipes[1]); | ||
|
|
678 | + box_inside(args); | ||
|
|
679 | + _exit(42); // We should never get here | ||
|
|
680 | + } | ||
|
|
681 | + | ||
|
|
682 | + setup_orig_credentials(); | ||
|
|
683 | + if (write(status_pipes[1], &inside_pid, sizeof(inside_pid)) != sizeof(inside_pid)) | ||
|
|
684 | + die("Proxy write to pipe failed: %m"); | ||
|
|
685 | + | ||
|
|
686 | + int stat; | ||
|
|
687 | + pid_t p = waitpid(inside_pid, &stat, 0); | ||
|
|
688 | + if (p < 0) | ||
|
|
689 | + die("Proxy waitpid() failed: %m"); | ||
|
|
690 | + | ||
|
|
691 | + if (write(status_pipes[1], &stat, sizeof(stat)) != sizeof(stat)) | ||
|
|
692 | + die("Proxy write to pipe failed: %m"); | ||
|
|
693 | + | ||
|
|
694 | + _exit(0); | ||
|
|
695 | + } | ||
|
|
696 | + | ||
|
|
697 | + static void | ||
|
|
698 | + box_init(void) | ||
|
|
699 | + { | ||
|
|
700 | + if (box_id < 0 || box_id >= cf_num_boxes) | ||
|
|
701 | + die("Sandbox ID out of range (allowed: 0-%d)", cf_num_boxes-1); | ||
|
|
702 | + box_uid = cf_first_uid + box_id; | ||
|
|
703 | + box_gid = cf_first_gid + box_id; | ||
|
|
704 | + | ||
|
|
705 | + snprintf(box_dir, sizeof(box_dir), "%s/%d", cf_box_root, box_id); | ||
|
|
706 | + make_dir(box_dir); | ||
|
|
707 | + if (chdir(box_dir) < 0) | ||
|
|
708 | + die("chdir(%s): %m", box_dir); | ||
|
|
709 | + } | ||
|
|
710 | + | ||
|
|
711 | + /*** Commands ***/ | ||
|
|
712 | + | ||
|
|
713 | + static const char * | ||
|
|
714 | + self_name(void) | ||
|
|
715 | + { | ||
|
|
716 | + return cg_enable ? "isolate --cg" : "isolate"; | ||
|
|
717 | + } | ||
|
|
718 | + | ||
|
|
719 | + static void | ||
|
|
720 | + init(void) | ||
|
|
721 | + { | ||
|
|
722 | + msg("Preparing sandbox directory\n"); | ||
|
|
723 | + if (mkdir("box", 0700) < 0) | ||
|
|
724 | + { | ||
|
|
725 | + if (errno == EEXIST) | ||
|
|
726 | + die("Box already exists, run `%s --cleanup' first", self_name()); | ||
|
|
727 | + else | ||
|
|
728 | + die("Cannot create box: %m"); | ||
|
|
729 | + } | ||
|
|
730 | + if (chown("box", orig_uid, orig_gid) < 0) | ||
|
|
731 | + die("Cannot chown box: %m"); | ||
|
|
732 | + | ||
|
|
733 | + cg_prepare(); | ||
|
|
734 | + set_quota(); | ||
|
|
735 | + | ||
|
|
736 | + puts(box_dir); | ||
|
|
737 | + } | ||
|
|
738 | + | ||
|
|
739 | + static void | ||
|
|
740 | + cleanup(void) | ||
|
|
741 | + { | ||
|
|
742 | + if (!dir_exists("box")) | ||
|
|
743 | + { | ||
|
|
744 | + msg("Nothing to do -- box directory did not exist\n"); | ||
|
|
745 | + return; | ||
|
|
746 | + } | ||
|
|
747 | + | ||
|
|
748 | + msg("Deleting sandbox directory\n"); | ||
|
|
749 | + rmtree(box_dir); | ||
|
|
750 | + cg_remove(); | ||
|
|
751 | + } | ||
|
|
752 | + | ||
|
|
753 | + static void | ||
|
|
754 | + setup_pipe(int *fds, int nonblocking) | ||
|
|
755 | + { | ||
|
|
756 | + if (pipe(fds) < 0) | ||
|
|
757 | + die("pipe: %m"); | ||
|
|
758 | + for (int i=0; i<2; i++) | ||
|
|
759 | + if (fcntl(fds[i], F_SETFD, fcntl(fds[i], F_GETFD) | FD_CLOEXEC) < 0 || | ||
|
|
760 | + nonblocking && fcntl(fds[i], F_SETFL, fcntl(fds[i], F_GETFL) | O_NONBLOCK) < 0) | ||
|
|
761 | + die("fcntl on pipe: %m"); | ||
|
|
762 | + } | ||
|
|
763 | + | ||
|
|
764 | + static void | ||
|
|
765 | + find_box_pid(void) | ||
|
|
766 | + { | ||
|
|
767 | + /* | ||
|
|
768 | + * The box keeper process wants to poll status of the inside process, | ||
|
|
769 | + * so it needs to know the box_pid. However, it is not easy to obtain: | ||
|
|
770 | + * we got the PID from the proxy, but it is local to the PID namespace. | ||
|
|
771 | + * Instead, we ask /proc to enumerate the children of the proxy. | ||
|
|
772 | + * | ||
|
|
773 | + * CAVEAT: The timing is tricky. We know that the inside process was | ||
|
|
774 | + * already started (passing the PID from the proxy to us guarantees it), | ||
|
|
775 | + * but it might already have exited and be reaped by the proxy. Therefore | ||
|
|
776 | + * it is correct if we fail to find anything. | ||
|
|
777 | + */ | ||
|
|
778 | + | ||
|
|
779 | + char namebuf[256]; | ||
|
|
780 | + snprintf(namebuf, sizeof(namebuf), "/proc/%d/task/%d/children", (int) proxy_pid, (int) proxy_pid); | ||
|
|
781 | + FILE *f = fopen(namebuf, "r"); | ||
|
|
782 | + if (!f) | ||
|
|
783 | + return; | ||
|
|
784 | + | ||
|
|
785 | + int child; | ||
|
|
786 | + if (fscanf(f, "%d", &child) != 1) | ||
|
|
787 | + { | ||
|
|
788 | + fclose(f); | ||
|
|
789 | + return; | ||
|
|
790 | + } | ||
|
|
791 | + box_pid = child; | ||
|
|
792 | + | ||
|
|
793 | + if (fscanf(f, "%d", &child) == 1) | ||
|
|
794 | + die("Error parsing %s: unexpected children found", namebuf); | ||
|
|
795 | + | ||
|
|
796 | + fclose(f); | ||
|
|
797 | + } | ||
|
|
798 | + | ||
|
|
799 | + static void | ||
|
|
800 | + run(char **argv) | ||
|
|
801 | + { | ||
|
|
802 | + if (!dir_exists("box")) | ||
|
|
803 | + die("Box directory not found, did you run `%s --init'?", self_name()); | ||
|
|
804 | + | ||
|
|
805 | + if (!inherit_fds) | ||
|
|
806 | + close_all_fds(); | ||
|
|
807 | + | ||
|
|
808 | + chowntree("box", box_uid, box_gid); | ||
|
|
809 | + cleanup_ownership = 1; | ||
|
|
810 | + | ||
|
|
811 | + setup_pipe(error_pipes, 1); | ||
|
|
812 | + setup_pipe(status_pipes, 0); | ||
|
|
813 | + setup_signals(); | ||
|
|
814 | + | ||
|
|
815 | + proxy_pid = clone( | ||
|
|
816 | + box_proxy, // Function to execute as the body of the new process | ||
|
|
817 | + argv, // Pass our stack | ||
|
|
818 | + SIGCHLD | CLONE_NEWIPC | (share_net ? 0 : CLONE_NEWNET) | CLONE_NEWNS | CLONE_NEWPID, | ||
|
|
819 | + argv); // Pass the arguments | ||
|
|
820 | + if (proxy_pid < 0) | ||
|
|
821 | + die("Cannot run proxy, clone failed: %m"); | ||
|
|
822 | + if (!proxy_pid) | ||
|
|
823 | + die("Cannot run proxy, clone returned 0"); | ||
|
|
824 | + | ||
|
|
825 | + pid_t box_pid_inside_ns; | ||
|
|
826 | + int n = read(status_pipes[0], &box_pid_inside_ns, sizeof(box_pid_inside_ns)); | ||
|
|
827 | + if (n != sizeof(box_pid_inside_ns)) | ||
|
|
828 | + die("Proxy failed before it passed box_pid: %m"); | ||
|
|
829 | + find_box_pid(); | ||
|
|
830 | + msg("Started proxy_pid=%d box_pid=%d box_pid_inside_ns=%d\n", (int) proxy_pid, (int) box_pid, (int) box_pid_inside_ns); | ||
|
|
831 | + | ||
|
|
832 | + box_keeper(); | ||
|
|
833 | + } | ||
|
|
834 | + | ||
|
|
835 | + static void | ||
|
|
836 | + show_version(void) | ||
|
|
837 | + { | ||
|
|
838 | + printf("The process isolator " VERSION "\n"); | ||
|
|
839 | + printf("(c) 2012--" YEAR " Martin Mares and Bernard Blackham\n"); | ||
|
|
840 | + printf("Built on " BUILD_DATE " from Git commit " BUILD_COMMIT "\n"); | ||
|
|
841 | + } | ||
|
|
842 | + | ||
|
|
843 | + /*** Options ***/ | ||
|
|
844 | + | ||
|
|
845 | + static void __attribute__((format(printf,1,2))) | ||
|
|
846 | + usage(const char *msg, ...) | ||
|
|
847 | + { | ||
|
|
848 | + if (msg != NULL) | ||
|
|
849 | + { | ||
|
|
850 | + va_list args; | ||
|
|
851 | + va_start(args, msg); | ||
|
|
852 | + vfprintf(stderr, msg, args); | ||
|
|
853 | + va_end(args); | ||
|
|
854 | + } | ||
|
|
855 | + printf("\ | ||
|
|
856 | + Usage: isolate [<options>] <command>\n\ | ||
|
|
857 | + \n\ | ||
|
|
858 | + Options:\n\ | ||
|
|
859 | + -b, --box-id=<id>\tWhen multiple sandboxes are used in parallel, each must get a unique ID\n\ | ||
|
|
860 | + --cg\t\tEnable use of control groups\n\ | ||
|
|
861 | + --cg-mem=<size>\tLimit memory usage of the control group to <size> KB\n\ | ||
|
|
862 | + --cg-timing\t\tTime limits affects total run time of the control group\n\ | ||
|
|
863 | + \t\t\t(this is turned on by default, use --no-cg-timing to turn off)\n\ | ||
|
|
864 | + -c, --chdir=<dir>\tChange directory to <dir> before executing the program\n\ | ||
|
|
865 | + -d, --dir=<dir>\t\tMake a directory <dir> visible inside the sandbox\n\ | ||
|
|
866 | + --dir=<in>=<out>\tMake a directory <out> outside visible as <in> inside\n\ | ||
|
|
867 | + --dir=<in>=\t\tDelete a previously defined directory rule (even a default one)\n\ | ||
|
|
868 | + --dir=...:<opt>\tSpecify options for a rule:\n\ | ||
|
|
869 | + \t\t\t\tdev\tAllow access to special files\n\ | ||
|
|
870 | + \t\t\t\tfs\tMount a filesystem (e.g., --dir=/proc:proc:fs)\n\ | ||
|
|
871 | + \t\t\t\tmaybe\tSkip the rule if <out> does not exist\n\ | ||
|
|
872 | + \t\t\t\tnoexec\tDo not allow execution of binaries\n\ | ||
|
|
873 | + \t\t\t\trw\tAllow read-write access\n\ | ||
|
|
874 | + -D, --no-default-dirs\tDo not add default directory rules\n\ | ||
|
|
875 | + -f, --fsize=<size>\tMax size (in KB) of files that can be created\n\ | ||
|
|
876 | + -E, --env=<var>\t\tInherit the environment variable <var> from the parent process\n\ | ||
|
|
877 | + -E, --env=<var>=<val>\tSet the environment variable <var> to <val>; unset it if <var> is empty\n\ | ||
|
|
878 | + -x, --extra-time=<time>\tSet extra timeout, before which a timing-out program is not yet killed,\n\ | ||
|
|
879 | + \t\t\tso that its real execution time is reported (seconds, fractions allowed)\n\ | ||
|
|
880 | + -e, --full-env\t\tInherit full environment of the parent process\n\ | ||
|
|
881 | + --inherit-fds\t\tInherit all file descriptors of the parent process\n\ | ||
|
|
882 | + -m, --mem=<size>\tLimit address space to <size> KB\n\ | ||
|
|
883 | + -M, --meta=<file>\tOutput process information to <file> (name:value)\n\ | ||
|
|
884 | + -q, --quota=<blk>,<ino>\tSet disk quota to <blk> blocks and <ino> inodes\n\ | ||
|
|
885 | + --share-net\t\tShare network namespace with the parent process\n\ | ||
|
|
886 | + -s, --silent\t\tDo not print status messages except for fatal errors\n\ | ||
|
|
887 | + -k, --stack=<size>\tLimit stack size to <size> KB (default: 0=unlimited)\n\ | ||
|
|
888 | + -r, --stderr=<file>\tRedirect stderr to <file>\n\ | ||
|
|
889 | + --stderr-to-stdout\tRedirect stderr to stdout\n\ | ||
|
|
890 | + -i, --stdin=<file>\tRedirect stdin from <file>\n\ | ||
|
|
891 | + -o, --stdout=<file>\tRedirect stdout to <file>\n\ | ||
|
|
892 | + -p, --processes[=<max>]\tEnable multiple processes (at most <max> of them); needs --cg\n\ | ||
|
|
893 | + -t, --time=<time>\tSet run time limit (seconds, fractions allowed)\n\ | ||
|
|
894 | + -v, --verbose\t\tBe verbose (use multiple times for even more verbosity)\n\ | ||
|
|
895 | + -w, --wall-time=<time>\tSet wall clock time limit (seconds, fractions allowed)\n\ | ||
|
|
896 | + \n\ | ||
|
|
897 | + Commands:\n\ | ||
|
|
898 | + --init\t\tInitialize sandbox (and its control group when --cg is used)\n\ | ||
|
|
899 | + --run -- <cmd> ...\tRun given command within sandbox\n\ | ||
|
|
900 | + --cleanup\t\tClean up sandbox\n\ | ||
|
|
901 | + --version\t\tDisplay program version and configuration\n\ | ||
|
|
902 | + "); | ||
|
|
903 | + exit(2); | ||
|
|
904 | + } | ||
|
|
905 | + | ||
|
|
906 | + enum opt_code { | ||
|
|
907 | + OPT_INIT = 256, | ||
|
|
908 | + OPT_RUN, | ||
|
|
909 | + OPT_CLEANUP, | ||
|
|
910 | + OPT_VERSION, | ||
|
|
911 | + OPT_CG, | ||
|
|
912 | + OPT_CG_MEM, | ||
|
|
913 | + OPT_CG_TIMING, | ||
|
|
914 | + OPT_NO_CG_TIMING, | ||
|
|
915 | + OPT_SHARE_NET, | ||
|
|
916 | + OPT_INHERIT_FDS, | ||
|
|
917 | + OPT_STDERR_TO_STDOUT, | ||
|
|
918 | + }; | ||
|
|
919 | + | ||
|
|
920 | + static const char short_opts[] = "b:c:d:DeE:f:i:k:m:M:o:p::q:r:st:vw:x:"; | ||
|
|
921 | + | ||
|
|
922 | + static const struct option long_opts[] = { | ||
|
|
923 | + { "box-id", 1, NULL, 'b' }, | ||
|
|
924 | + { "chdir", 1, NULL, 'c' }, | ||
|
|
925 | + { "cg", 0, NULL, OPT_CG }, | ||
|
|
926 | + { "cg-mem", 1, NULL, OPT_CG_MEM }, | ||
|
|
927 | + { "cg-timing", 0, NULL, OPT_CG_TIMING }, | ||
|
|
928 | + { "cleanup", 0, NULL, OPT_CLEANUP }, | ||
|
|
929 | + { "dir", 1, NULL, 'd' }, | ||
|
|
930 | + { "no-cg-timing", 0, NULL, OPT_NO_CG_TIMING }, | ||
|
|
931 | + { "no-default-dirs", 0, NULL, 'D' }, | ||
|
|
932 | + { "fsize", 1, NULL, 'f' }, | ||
|
|
933 | + { "env", 1, NULL, 'E' }, | ||
|
|
934 | + { "extra-time", 1, NULL, 'x' }, | ||
|
|
935 | + { "full-env", 0, NULL, 'e' }, | ||
|
|
936 | + { "inherit-fds", 0, NULL, OPT_INHERIT_FDS }, | ||
|
|
937 | + { "init", 0, NULL, OPT_INIT }, | ||
|
|
938 | + { "mem", 1, NULL, 'm' }, | ||
|
|
939 | + { "meta", 1, NULL, 'M' }, | ||
|
|
940 | + { "processes", 2, NULL, 'p' }, | ||
|
|
941 | + { "quota", 1, NULL, 'q' }, | ||
|
|
942 | + { "run", 0, NULL, OPT_RUN }, | ||
|
|
943 | + { "share-net", 0, NULL, OPT_SHARE_NET }, | ||
|
|
944 | + { "silent", 0, NULL, 's' }, | ||
|
|
945 | + { "stack", 1, NULL, 'k' }, | ||
|
|
946 | + { "stderr", 1, NULL, 'r' }, | ||
|
|
947 | + { "stderr-to-stdout", 0, NULL, OPT_STDERR_TO_STDOUT }, | ||
|
|
948 | + { "stdin", 1, NULL, 'i' }, | ||
|
|
949 | + { "stdout", 1, NULL, 'o' }, | ||
|
|
950 | + { "time", 1, NULL, 't' }, | ||
|
|
951 | + { "verbose", 0, NULL, 'v' }, | ||
|
|
952 | + { "version", 0, NULL, OPT_VERSION }, | ||
|
|
953 | + { "wall-time", 1, NULL, 'w' }, | ||
|
|
954 | + { NULL, 0, NULL, 0 } | ||
|
|
955 | + }; | ||
|
|
956 | + | ||
|
|
957 | + int | ||
|
|
958 | + main(int argc, char **argv) | ||
|
|
959 | + { | ||
|
|
960 | + int c; | ||
|
|
961 | + int require_cg = 0; | ||
|
|
962 | + char *sep; | ||
|
|
963 | + enum opt_code mode = 0; | ||
|
|
964 | + | ||
|
|
965 | + init_dir_rules(); | ||
|
|
966 | + | ||
|
|
967 | + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) >= 0) | ||
|
|
968 | + switch (c) | ||
|
|
969 | + { | ||
|
|
970 | + case 'b': | ||
|
|
971 | + box_id = atoi(optarg); | ||
|
|
972 | + break; | ||
|
|
973 | + case 'c': | ||
|
|
974 | + set_cwd = optarg; | ||
|
|
975 | + break; | ||
|
|
976 | + case OPT_CG: | ||
|
|
977 | + cg_enable = 1; | ||
|
|
978 | + break; | ||
|
|
979 | + case 'd': | ||
|
|
980 | + if (!set_dir_action(optarg)) | ||
|
|
981 | + usage("Invalid directory specified: %s\n", optarg); | ||
|
|
982 | + break; | ||
|
|
983 | + case 'D': | ||
|
|
984 | + default_dirs = 0; | ||
|
|
985 | + break; | ||
|
|
986 | + case 'e': | ||
|
|
987 | + pass_environ = 1; | ||
|
|
988 | + break; | ||
|
|
989 | + case 'E': | ||
|
|
990 | + if (!set_env_action(optarg)) | ||
|
|
991 | + usage("Invalid environment specified: %s\n", optarg); | ||
|
|
992 | + break; | ||
|
|
993 | + case 'f': | ||
|
|
994 | + fsize_limit = atoi(optarg); | ||
|
|
995 | + break; | ||
|
|
996 | + case 'k': | ||
|
|
997 | + stack_limit = atoi(optarg); | ||
|
|
998 | + break; | ||
|
|
999 | + case 'i': | ||
|
|
1000 | + redir_stdin = optarg; | ||
|
|
1001 | + break; | ||
|
|
1002 | + case 'm': | ||
|
|
1003 | + memory_limit = atoi(optarg); | ||
|
|
1004 | + break; | ||
|
|
1005 | + case 'M': | ||
|
|
1006 | + meta_open(optarg); | ||
|
|
1007 | + break; | ||
|
|
1008 | + case 'o': | ||
|
|
1009 | + redir_stdout = optarg; | ||
|
|
1010 | + break; | ||
|
|
1011 | + case 'p': | ||
|
|
1012 | + if (optarg) | ||
|
|
1013 | + max_processes = atoi(optarg); | ||
|
|
1014 | + else | ||
|
|
1015 | + max_processes = 0; | ||
|
|
1016 | + break; | ||
|
|
1017 | + case 'q': | ||
|
|
1018 | + sep = strchr(optarg, ','); | ||
|
|
1019 | + if (!sep) | ||
|
|
1020 | + usage("Invalid quota specified: %s\n", optarg); | ||
|
|
1021 | + block_quota = atoi(optarg); | ||
|
|
1022 | + inode_quota = atoi(sep+1); | ||
|
|
1023 | + break; | ||
|
|
1024 | + case 'r': | ||
|
|
1025 | + redir_stderr = optarg; | ||
|
|
1026 | + redir_stderr_to_stdout = 0; | ||
|
|
1027 | + break; | ||
|
|
1028 | + case 's': | ||
|
|
1029 | + silent++; | ||
|
|
1030 | + break; | ||
|
|
1031 | + case 't': | ||
|
|
1032 | + timeout = 1000*atof(optarg); | ||
|
|
1033 | + break; | ||
|
|
1034 | + case 'v': | ||
|
|
1035 | + verbose++; | ||
|
|
1036 | + break; | ||
|
|
1037 | + case 'w': | ||
|
|
1038 | + wall_timeout = 1000*atof(optarg); | ||
|
|
1039 | + break; | ||
|
|
1040 | + case 'x': | ||
|
|
1041 | + extra_timeout = 1000*atof(optarg); | ||
|
|
1042 | + break; | ||
|
|
1043 | + case OPT_INIT: | ||
|
|
1044 | + case OPT_RUN: | ||
|
|
1045 | + case OPT_CLEANUP: | ||
|
|
1046 | + case OPT_VERSION: | ||
|
|
1047 | + if (!mode || (int) mode == c) | ||
|
|
1048 | + mode = c; | ||
|
|
1049 | + else | ||
|
|
1050 | + usage("Only one command is allowed.\n"); | ||
|
|
1051 | + break; | ||
|
|
1052 | + case OPT_CG_MEM: | ||
|
|
1053 | + cg_memory_limit = atoi(optarg); | ||
|
|
1054 | + require_cg = 1; | ||
|
|
1055 | + break; | ||
|
|
1056 | + case OPT_CG_TIMING: | ||
|
|
1057 | + cg_timing = 1; | ||
|
|
1058 | + require_cg = 1; | ||
|
|
1059 | + break; | ||
|
|
1060 | + case OPT_NO_CG_TIMING: | ||
|
|
1061 | + cg_timing = 0; | ||
|
|
1062 | + require_cg = 1; | ||
|
|
1063 | + break; | ||
|
|
1064 | + case OPT_SHARE_NET: | ||
|
|
1065 | + share_net = 1; | ||
|
|
1066 | + break; | ||
|
|
1067 | + case OPT_INHERIT_FDS: | ||
|
|
1068 | + inherit_fds = 1; | ||
|
|
1069 | + break; | ||
|
|
1070 | + case OPT_STDERR_TO_STDOUT: | ||
|
|
1071 | + redir_stderr = NULL; | ||
|
|
1072 | + redir_stderr_to_stdout = 1; | ||
|
|
1073 | + break; | ||
|
|
1074 | + default: | ||
|
|
1075 | + usage(NULL); | ||
|
|
1076 | + } | ||
|
|
1077 | + | ||
|
|
1078 | + if (!mode) | ||
|
|
1079 | + usage("Please specify an isolate command (e.g. --init, --run).\n"); | ||
|
|
1080 | + if (mode == OPT_VERSION) | ||
|
|
1081 | + { | ||
|
|
1082 | + show_version(); | ||
|
|
1083 | + return 0; | ||
|
|
1084 | + } | ||
|
|
1085 | + | ||
|
|
1086 | + if (require_cg && !cg_enable) | ||
|
|
1087 | + usage("Options related to control groups require --cg to be set.\n"); | ||
|
|
1088 | + | ||
|
|
1089 | + if (geteuid()) | ||
|
|
1090 | + die("Must be started as root"); | ||
|
|
1091 | + if (getegid() && setegid(0) < 0) | ||
|
|
1092 | + die("Cannot switch to root group: %m"); | ||
|
|
1093 | + orig_uid = getuid(); | ||
|
|
1094 | + orig_gid = getgid(); | ||
|
|
1095 | + | ||
|
|
1096 | + umask(022); | ||
|
|
1097 | + cf_parse(); | ||
|
|
1098 | + box_init(); | ||
|
|
1099 | + cg_init(); | ||
|
|
1100 | + | ||
|
|
1101 | + switch (mode) | ||
|
|
1102 | + { | ||
|
|
1103 | + case OPT_INIT: | ||
|
|
1104 | + if (optind < argc) | ||
|
|
1105 | + usage("--init mode takes no parameters\n"); | ||
|
|
1106 | + init(); | ||
|
|
1107 | + break; | ||
|
|
1108 | + case OPT_RUN: | ||
|
|
1109 | + if (optind >= argc) | ||
|
|
1110 | + usage("--run mode requires a command to run\n"); | ||
|
|
1111 | + run(argv+optind); | ||
|
|
1112 | + break; | ||
|
|
1113 | + case OPT_CLEANUP: | ||
|
|
1114 | + if (optind < argc) | ||
|
|
1115 | + usage("--cleanup mode takes no parameters\n"); | ||
|
|
1116 | + cleanup(); | ||
|
|
1117 | + break; | ||
|
|
1118 | + default: | ||
|
|
1119 | + die("Internal error: mode mismatch"); | ||
|
|
1120 | + } | ||
|
|
1121 | + exit(0); | ||
|
|
1122 | + } |
@@ -0,0 +1,86 | |||||
|
|
1 | + /* | ||
|
|
2 | + * Process Isolator | ||
|
|
3 | + * | ||
|
|
4 | + * (c) 2012-2017 Martin Mares <mj@ucw.cz> | ||
|
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | ||
|
|
6 | + */ | ||
|
|
7 | + | ||
|
|
8 | + #include <stdarg.h> | ||
|
|
9 | + #include <stdint.h> | ||
|
|
10 | + #include <sys/types.h> | ||
|
|
11 | + | ||
|
|
12 | + #define NONRET __attribute__((noreturn)) | ||
|
|
13 | + #define UNUSED __attribute__((unused)) | ||
|
|
14 | + #define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) | ||
|
|
15 | + | ||
|
|
16 | + /* isolate.c */ | ||
|
|
17 | + | ||
|
|
18 | + void die(char *msg, ...) NONRET; | ||
|
|
19 | + void NONRET __attribute__((format(printf,1,2))) err(char *msg, ...); | ||
|
|
20 | + void __attribute__((format(printf,1,2))) msg(char *msg, ...); | ||
|
|
21 | + | ||
|
|
22 | + extern int pass_environ; | ||
|
|
23 | + extern int verbose; | ||
|
|
24 | + extern int block_quota; | ||
|
|
25 | + extern int inode_quota; | ||
|
|
26 | + extern int cg_enable; | ||
|
|
27 | + extern int cg_memory_limit; | ||
|
|
28 | + extern int cg_timing; | ||
|
|
29 | + | ||
|
|
30 | + extern int box_id; | ||
|
|
31 | + extern uid_t box_uid, orig_uid; | ||
|
|
32 | + extern gid_t box_gid, orig_gid; | ||
|
|
33 | + | ||
|
|
34 | + /* util.c */ | ||
|
|
35 | + | ||
|
|
36 | + void *xmalloc(size_t size); | ||
|
|
37 | + char *xstrdup(char *str); | ||
|
|
38 | + int dir_exists(char *path); | ||
|
|
39 | + void rmtree(char *path); | ||
|
|
40 | + void make_dir(char *path); | ||
|
|
41 | + void chowntree(char *path, uid_t uid, gid_t gid); | ||
|
|
42 | + void close_all_fds(void); | ||
|
|
43 | + | ||
|
|
44 | + void meta_open(const char *name); | ||
|
|
45 | + void meta_close(void); | ||
|
|
46 | + void __attribute__((format(printf,1,2))) meta_printf(const char *fmt, ...); | ||
|
|
47 | + | ||
|
|
48 | + /* rules.c */ | ||
|
|
49 | + | ||
|
|
50 | + int set_env_action(char *a0); | ||
|
|
51 | + char **setup_environment(void); | ||
|
|
52 | + | ||
|
|
53 | + void init_dir_rules(void); | ||
|
|
54 | + int set_dir_action(char *arg); | ||
|
|
55 | + void apply_dir_rules(int with_defaults); | ||
|
|
56 | + | ||
|
|
57 | + void set_quota(void); | ||
|
|
58 | + | ||
|
|
59 | + /* cg.c */ | ||
|
|
60 | + | ||
|
|
61 | + void cg_init(void); | ||
|
|
62 | + void cg_prepare(void); | ||
|
|
63 | + void cg_enter(void); | ||
|
|
64 | + int cg_get_run_time_ms(void); | ||
|
|
65 | + void cg_stats(void); | ||
|
|
66 | + void cg_remove(void); | ||
|
|
67 | + | ||
|
|
68 | + /* config.c */ | ||
|
|
69 | + | ||
|
|
70 | + extern char *cf_box_root; | ||
|
|
71 | + extern char *cf_cg_root; | ||
|
|
72 | + extern char *cf_cg_parent; | ||
|
|
73 | + extern int cf_first_uid; | ||
|
|
74 | + extern int cf_first_gid; | ||
|
|
75 | + extern int cf_num_boxes; | ||
|
|
76 | + | ||
|
|
77 | + struct cf_per_box { | ||
|
|
78 | + struct cf_per_box *next; | ||
|
|
79 | + int box_id; | ||
|
|
80 | + char *cpus; | ||
|
|
81 | + char *mems; | ||
|
|
82 | + }; | ||
|
|
83 | + | ||
|
|
84 | + void cf_parse(void); | ||
|
|
85 | + struct cf_per_box *cf_per_box(int box_id); | ||
|
|
86 | + struct cf_per_box *cf_current_box(void); |
This diff has been collapsed as it changes many lines, (509 lines changed) Show them Hide them | |||||
@@ -0,0 +1,509 | |||||
|
|
1 | + /* | ||
|
|
2 | + * Process Isolator -- Rules | ||
|
|
3 | + * | ||
|
|
4 | + * (c) 2012-2018 Martin Mares <mj@ucw.cz> | ||
|
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | ||
|
|
6 | + */ | ||
|
|
7 | + | ||
|
|
8 | + #include "isolate.h" | ||
|
|
9 | + | ||
|
|
10 | + #include <limits.h> | ||
|
|
11 | + #include <mntent.h> | ||
|
|
12 | + #include <stdio.h> | ||
|
|
13 | + #include <stdlib.h> | ||
|
|
14 | + #include <string.h> | ||
|
|
15 | + #include <sys/capability.h> | ||
|
|
16 | + #include <sys/mount.h> | ||
|
|
17 | + #include <sys/quota.h> | ||
|
|
18 | + #include <sys/stat.h> | ||
|
|
19 | + #include <sys/vfs.h> | ||
|
|
20 | + #include <unistd.h> | ||
|
|
21 | + | ||
|
|
22 | + /*** Environment rules ***/ | ||
|
|
23 | + | ||
|
|
24 | + struct env_rule { | ||
|
|
25 | + char *var; // Variable to match | ||
|
|
26 | + char *val; // ""=clear, NULL=inherit | ||
|
|
27 | + int var_len; | ||
|
|
28 | + struct env_rule *next; | ||
|
|
29 | + }; | ||
|
|
30 | + | ||
|
|
31 | + static struct env_rule *first_env_rule; | ||
|
|
32 | + static struct env_rule **last_env_rule = &first_env_rule; | ||
|
|
33 | + | ||
|
|
34 | + static struct env_rule default_env_rules[] = { | ||
|
|
35 | + { .var = "LIBC_FATAL_STDERR_", .val = "1", .var_len = 18 }, | ||
|
|
36 | + }; | ||
|
|
37 | + | ||
|
|
38 | + int | ||
|
|
39 | + set_env_action(char *a0) | ||
|
|
40 | + { | ||
|
|
41 | + struct env_rule *r = xmalloc(sizeof(*r) + strlen(a0) + 1); | ||
|
|
42 | + char *a = (char *)(r+1); | ||
|
|
43 | + strcpy(a, a0); | ||
|
|
44 | + | ||
|
|
45 | + char *sep = strchr(a, '='); | ||
|
|
46 | + if (sep == a) | ||
|
|
47 | + return 0; | ||
|
|
48 | + r->var = a; | ||
|
|
49 | + if (sep) | ||
|
|
50 | + { | ||
|
|
51 | + *sep++ = 0; | ||
|
|
52 | + r->val = sep; | ||
|
|
53 | + } | ||
|
|
54 | + else | ||
|
|
55 | + r->val = NULL; | ||
|
|
56 | + *last_env_rule = r; | ||
|
|
57 | + last_env_rule = &r->next; | ||
|
|
58 | + r->next = NULL; | ||
|
|
59 | + return 1; | ||
|
|
60 | + } | ||
|
|
61 | + | ||
|
|
62 | + static int | ||
|
|
63 | + match_env_var(char *env_entry, struct env_rule *r) | ||
|
|
64 | + { | ||
|
|
65 | + if (strncmp(env_entry, r->var, r->var_len)) | ||
|
|
66 | + return 0; | ||
|
|
67 | + return (env_entry[r->var_len] == '='); | ||
|
|
68 | + } | ||
|
|
69 | + | ||
|
|
70 | + static void | ||
|
|
71 | + apply_env_rule(char **env, int *env_sizep, struct env_rule *r) | ||
|
|
72 | + { | ||
|
|
73 | + // First remove the variable if already set | ||
|
|
74 | + int pos = 0; | ||
|
|
75 | + while (pos < *env_sizep && !match_env_var(env[pos], r)) | ||
|
|
76 | + pos++; | ||
|
|
77 | + if (pos < *env_sizep) | ||
|
|
78 | + { | ||
|
|
79 | + (*env_sizep)--; | ||
|
|
80 | + env[pos] = env[*env_sizep]; | ||
|
|
81 | + env[*env_sizep] = NULL; | ||
|
|
82 | + } | ||
|
|
83 | + | ||
|
|
84 | + // What is the new value? | ||
|
|
85 | + char *new; | ||
|
|
86 | + if (r->val) | ||
|
|
87 | + { | ||
|
|
88 | + if (!r->val[0]) | ||
|
|
89 | + return; | ||
|
|
90 | + new = xmalloc(r->var_len + 1 + strlen(r->val) + 1); | ||
|
|
91 | + sprintf(new, "%s=%s", r->var, r->val); | ||
|
|
92 | + } | ||
|
|
93 | + else | ||
|
|
94 | + { | ||
|
|
95 | + pos = 0; | ||
|
|
96 | + while (environ[pos] && !match_env_var(environ[pos], r)) | ||
|
|
97 | + pos++; | ||
|
|
98 | + if (!(new = environ[pos])) | ||
|
|
99 | + return; | ||
|
|
100 | + } | ||
|
|
101 | + | ||
|
|
102 | + // Add it at the end of the array | ||
|
|
103 | + env[(*env_sizep)++] = new; | ||
|
|
104 | + env[*env_sizep] = NULL; | ||
|
|
105 | + } | ||
|
|
106 | + | ||
|
|
107 | + char ** | ||
|
|
108 | + setup_environment(void) | ||
|
|
109 | + { | ||
|
|
110 | + // Link built-in rules with user rules | ||
|
|
111 | + for (int i=ARRAY_SIZE(default_env_rules)-1; i >= 0; i--) | ||
|
|
112 | + { | ||
|
|
113 | + default_env_rules[i].next = first_env_rule; | ||
|
|
114 | + first_env_rule = &default_env_rules[i]; | ||
|
|
115 | + } | ||
|
|
116 | + | ||
|
|
117 | + // Scan the original environment | ||
|
|
118 | + char **orig_env = environ; | ||
|
|
119 | + int orig_size = 0; | ||
|
|
120 | + while (orig_env[orig_size]) | ||
|
|
121 | + orig_size++; | ||
|
|
122 | + | ||
|
|
123 | + // For each rule, reserve one more slot and calculate length | ||
|
|
124 | + int num_rules = 0; | ||
|
|
125 | + for (struct env_rule *r = first_env_rule; r; r=r->next) | ||
|
|
126 | + { | ||
|
|
127 | + num_rules++; | ||
|
|
128 | + r->var_len = strlen(r->var); | ||
|
|
129 | + } | ||
|
|
130 | + | ||
|
|
131 | + // Create a new environment | ||
|
|
132 | + char **env = xmalloc((orig_size + num_rules + 1) * sizeof(char *)); | ||
|
|
133 | + int size; | ||
|
|
134 | + if (pass_environ) | ||
|
|
135 | + { | ||
|
|
136 | + memcpy(env, environ, orig_size * sizeof(char *)); | ||
|
|
137 | + size = orig_size; | ||
|
|
138 | + } | ||
|
|
139 | + else | ||
|
|
140 | + size = 0; | ||
|
|
141 | + env[size] = NULL; | ||
|
|
142 | + | ||
|
|
143 | + // Apply the rules one by one | ||
|
|
144 | + for (struct env_rule *r = first_env_rule; r; r=r->next) | ||
|
|
145 | + apply_env_rule(env, &size, r); | ||
|
|
146 | + | ||
|
|
147 | + // Return the new env and pass some gossip | ||
|
|
148 | + if (verbose > 1) | ||
|
|
149 | + { | ||
|
|
150 | + fprintf(stderr, "Passing environment:\n"); | ||
|
|
151 | + for (int i=0; env[i]; i++) | ||
|
|
152 | + fprintf(stderr, "\t%s\n", env[i]); | ||
|
|
153 | + } | ||
|
|
154 | + return env; | ||
|
|
155 | + } | ||
|
|
156 | + | ||
|
|
157 | + /*** Directory rules ***/ | ||
|
|
158 | + | ||
|
|
159 | + struct dir_rule { | ||
|
|
160 | + char *inside; // A relative path | ||
|
|
161 | + char *outside; // This can be an absolute path or a relative path starting with "./" | ||
|
|
162 | + unsigned int flags; // DIR_FLAG_xxx | ||
|
|
163 | + struct dir_rule *next; | ||
|
|
164 | + }; | ||
|
|
165 | + | ||
|
|
166 | + enum dir_rule_flags { | ||
|
|
167 | + DIR_FLAG_RW = 1, | ||
|
|
168 | + DIR_FLAG_NOEXEC = 2, | ||
|
|
169 | + DIR_FLAG_FS = 4, | ||
|
|
170 | + DIR_FLAG_MAYBE = 8, | ||
|
|
171 | + DIR_FLAG_DEV = 16, | ||
|
|
172 | + DIR_FLAG_DEFAULT = 1U << 15, // Used internally | ||
|
|
173 | + DIR_FLAG_DISABLED = 1U << 16, // Used internally | ||
|
|
174 | + }; | ||
|
|
175 | + | ||
|
|
176 | + static const char * const dir_flag_names[] = { "rw", "noexec", "fs", "maybe", "dev" }; | ||
|
|
177 | + | ||
|
|
178 | + static struct dir_rule *first_dir_rule; | ||
|
|
179 | + static struct dir_rule **last_dir_rule = &first_dir_rule; | ||
|
|
180 | + | ||
|
|
181 | + static char * | ||
|
|
182 | + sanitize_dir_path(char *path) | ||
|
|
183 | + { | ||
|
|
184 | + // Strip leading slashes | ||
|
|
185 | + while (*path == '/') | ||
|
|
186 | + path++; | ||
|
|
187 | + if (!*path) | ||
|
|
188 | + return NULL; | ||
|
|
189 | + | ||
|
|
190 | + // Check for ".." components | ||
|
|
191 | + char *p = path; | ||
|
|
192 | + while (*p) | ||
|
|
193 | + { | ||
|
|
194 | + char *next = strchr(p, '/'); | ||
|
|
195 | + if (!next) | ||
|
|
196 | + next = p + strlen(p); | ||
|
|
197 | + | ||
|
|
198 | + int len = next - p; | ||
|
|
199 | + if (len == 2 && !memcmp(p, "..", 2)) | ||
|
|
200 | + return NULL; | ||
|
|
201 | + | ||
|
|
202 | + p = *next ? next+1 : next; | ||
|
|
203 | + } | ||
|
|
204 | + | ||
|
|
205 | + return path; | ||
|
|
206 | + } | ||
|
|
207 | + | ||
|
|
208 | + static int | ||
|
|
209 | + add_dir_rule(char *in, char *out, unsigned int flags) | ||
|
|
210 | + { | ||
|
|
211 | + // Make sure that "in" does not try to escape the box | ||
|
|
212 | + in = sanitize_dir_path(in); | ||
|
|
213 | + if (!in) | ||
|
|
214 | + return 0; | ||
|
|
215 | + | ||
|
|
216 | + // Check "out" | ||
|
|
217 | + if (flags & DIR_FLAG_FS) | ||
|
|
218 | + { | ||
|
|
219 | + if (!out || out[0] == '/') | ||
|
|
220 | + return 0; | ||
|
|
221 | + } | ||
|
|
222 | + else | ||
|
|
223 | + { | ||
|
|
224 | + if (out && out[0] != '/' && strncmp(out, "./", 2)) | ||
|
|
225 | + return 0; | ||
|
|
226 | + } | ||
|
|
227 | + | ||
|
|
228 | + // Override an existing rule | ||
|
|
229 | + struct dir_rule *r; | ||
|
|
230 | + for (r = first_dir_rule; r; r = r->next) | ||
|
|
231 | + if (!strcmp(r->inside, in)) | ||
|
|
232 | + break; | ||
|
|
233 | + | ||
|
|
234 | + // Add a new rule | ||
|
|
235 | + if (!r) | ||
|
|
236 | + { | ||
|
|
237 | + r = xmalloc(sizeof(*r)); | ||
|
|
238 | + r->inside = in; | ||
|
|
239 | + *last_dir_rule = r; | ||
|
|
240 | + last_dir_rule = &r->next; | ||
|
|
241 | + r->next = NULL; | ||
|
|
242 | + } | ||
|
|
243 | + r->outside = out; | ||
|
|
244 | + r->flags = flags; | ||
|
|
245 | + return 1; | ||
|
|
246 | + } | ||
|
|
247 | + | ||
|
|
248 | + static unsigned int | ||
|
|
249 | + parse_dir_option(char *opt) | ||
|
|
250 | + { | ||
|
|
251 | + for (unsigned int i = 0; i < ARRAY_SIZE(dir_flag_names); i++) | ||
|
|
252 | + if (!strcmp(opt, dir_flag_names[i])) | ||
|
|
253 | + return 1U << i; | ||
|
|
254 | + die("Unknown directory option %s", opt); | ||
|
|
255 | + } | ||
|
|
256 | + | ||
|
|
257 | + static int | ||
|
|
258 | + set_dir_action_ext(char *arg, unsigned int ext_flags) | ||
|
|
259 | + { | ||
|
|
260 | + arg = xstrdup(arg); | ||
|
|
261 | + | ||
|
|
262 | + char *colon = strchr(arg, ':'); | ||
|
|
263 | + unsigned int flags = ext_flags; | ||
|
|
264 | + while (colon) | ||
|
|
265 | + { | ||
|
|
266 | + *colon++ = 0; | ||
|
|
267 | + char *next = strchr(colon, ':'); | ||
|
|
268 | + if (next) | ||
|
|
269 | + *next = 0; | ||
|
|
270 | + flags |= parse_dir_option(colon); | ||
|
|
271 | + colon = next; | ||
|
|
272 | + } | ||
|
|
273 | + | ||
|
|
274 | + char *eq = strchr(arg, '='); | ||
|
|
275 | + if (eq) | ||
|
|
276 | + { | ||
|
|
277 | + *eq++ = 0; | ||
|
|
278 | + return add_dir_rule(arg, (*eq ? eq : NULL), flags); | ||
|
|
279 | + } | ||
|
|
280 | + else | ||
|
|
281 | + { | ||
|
|
282 | + char *out = xmalloc(1 + strlen(arg) + 1); | ||
|
|
283 | + sprintf(out, "/%s", arg); | ||
|
|
284 | + return add_dir_rule(arg, out, flags); | ||
|
|
285 | + } | ||
|
|
286 | + } | ||
|
|
287 | + | ||
|
|
288 | + int | ||
|
|
289 | + set_dir_action(char *arg) | ||
|
|
290 | + { | ||
|
|
291 | + return set_dir_action_ext(arg, 0); | ||
|
|
292 | + } | ||
|
|
293 | + | ||
|
|
294 | + static int | ||
|
|
295 | + set_dir_action_default(char *arg) | ||
|
|
296 | + { | ||
|
|
297 | + return set_dir_action_ext(arg, DIR_FLAG_DEFAULT); | ||
|
|
298 | + } | ||
|
|
299 | + | ||
|
|
300 | + void | ||
|
|
301 | + init_dir_rules(void) | ||
|
|
302 | + { | ||
|
|
303 | + set_dir_action_default("box=./box:rw"); | ||
|
|
304 | + set_dir_action_default("bin"); | ||
|
|
305 | + set_dir_action_default("dev:dev"); | ||
|
|
306 | + set_dir_action_default("lib"); | ||
|
|
307 | + set_dir_action_default("lib64:maybe"); | ||
|
|
308 | + set_dir_action_default("proc=proc:fs"); | ||
|
|
309 | + set_dir_action_default("usr"); | ||
|
|
310 | + } | ||
|
|
311 | + | ||
|
|
312 | + static void | ||
|
|
313 | + set_cap_sys_admin(void) | ||
|
|
314 | + { | ||
|
|
315 | + cap_t caps; | ||
|
|
316 | + if (!(caps = cap_get_proc())) | ||
|
|
317 | + die("Cannot get capabilities: %m"); | ||
|
|
318 | + | ||
|
|
319 | + cap_value_t cap_list[] = { CAP_SYS_ADMIN }; | ||
|
|
320 | + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_SET) < 0) | ||
|
|
321 | + die("Cannot modify capabilities"); | ||
|
|
322 | + | ||
|
|
323 | + if (cap_set_proc(caps) < 0) | ||
|
|
324 | + die("Cannot set capabilities: %m"); | ||
|
|
325 | + | ||
|
|
326 | + cap_free(caps); | ||
|
|
327 | + } | ||
|
|
328 | + | ||
|
|
329 | + void | ||
|
|
330 | + apply_dir_rules(int with_defaults) | ||
|
|
331 | + { | ||
|
|
332 | + /* | ||
|
|
333 | + * Before mounting anything, we create all mount points inside the box. | ||
|
|
334 | + * This is necessary to avoid bypassing directory permissions. If you | ||
|
|
335 | + * want nested binds, you have to create the mount points explicitly. | ||
|
|
336 | + */ | ||
|
|
337 | + for (struct dir_rule *r = first_dir_rule; r; r=r->next) | ||
|
|
338 | + { | ||
|
|
339 | + if (!with_defaults && (r->flags & DIR_FLAG_DEFAULT)) | ||
|
|
340 | + continue; | ||
|
|
341 | + | ||
|
|
342 | + char *in = r->inside; | ||
|
|
343 | + char *out = r->outside; | ||
|
|
344 | + | ||
|
|
345 | + if (!out) | ||
|
|
346 | + { | ||
|
|
347 | + msg("Not binding anything on %s\n", in); | ||
|
|
348 | + r->flags |= DIR_FLAG_DISABLED; | ||
|
|
349 | + continue; | ||
|
|
350 | + } | ||
|
|
351 | + | ||
|
|
352 | + if ((r->flags & DIR_FLAG_MAYBE) && !dir_exists(out)) | ||
|
|
353 | + { | ||
|
|
354 | + msg("Not binding %s on %s (does not exist)\n", out, r->inside); | ||
|
|
355 | + r->flags |= DIR_FLAG_DISABLED; | ||
|
|
356 | + continue; | ||
|
|
357 | + } | ||
|
|
358 | + | ||
|
|
359 | + char root_in[1024]; | ||
|
|
360 | + snprintf(root_in, sizeof(root_in), "root/%s", in); | ||
|
|
361 | + make_dir(root_in); | ||
|
|
362 | + } | ||
|
|
363 | + | ||
|
|
364 | + for (struct dir_rule *r = first_dir_rule; r; r=r->next) | ||
|
|
365 | + { | ||
|
|
366 | + if (r->flags & DIR_FLAG_DISABLED) | ||
|
|
367 | + continue; | ||
|
|
368 | + if (!with_defaults && (r->flags & DIR_FLAG_DEFAULT)) | ||
|
|
369 | + continue; | ||
|
|
370 | + | ||
|
|
371 | + char *in = r->inside; | ||
|
|
372 | + char *out = r->outside; | ||
|
|
373 | + char root_in[1024]; | ||
|
|
374 | + snprintf(root_in, sizeof(root_in), "root/%s", in); | ||
|
|
375 | + | ||
|
|
376 | + unsigned long mount_flags = 0; | ||
|
|
377 | + if (!(r->flags & DIR_FLAG_RW)) | ||
|
|
378 | + mount_flags |= MS_RDONLY; | ||
|
|
379 | + if (r->flags & DIR_FLAG_NOEXEC) | ||
|
|
380 | + mount_flags |= MS_NOEXEC; | ||
|
|
381 | + if (!(r->flags & DIR_FLAG_DEV)) | ||
|
|
382 | + mount_flags |= MS_NODEV; | ||
|
|
383 | + | ||
|
|
384 | + if (r->flags & DIR_FLAG_FS) | ||
|
|
385 | + { | ||
|
|
386 | + msg("Mounting %s on %s (flags %lx)\n", out, in, mount_flags); | ||
|
|
387 | + if (mount("none", root_in, out, mount_flags, "") < 0) | ||
|
|
388 | + die("Cannot mount %s on %s: %m", out, in); | ||
|
|
389 | + if (!strcmp(in, "proc")) | ||
|
|
390 | + { | ||
|
|
391 | + // If we are mounting procfs, add hidepid=2, so that only the processes | ||
|
|
392 | + // of the same user are visible. This has to be done as a remount. | ||
|
|
393 | + if (mount("none", root_in, out, MS_REMOUNT | mount_flags, "hidepid=2") < 0) | ||
|
|
394 | + die("Cannot re-mount proc with hidepid option: %m"); | ||
|
|
395 | + } | ||
|
|
396 | + } | ||
|
|
397 | + else | ||
|
|
398 | + { | ||
|
|
399 | + mount_flags |= MS_BIND | MS_NOSUID; | ||
|
|
400 | + msg("Binding %s on %s (flags %lx)\n", out, in, mount_flags); | ||
|
|
401 | + | ||
|
|
402 | + /* | ||
|
|
403 | + * This is tricky. We cannot run mount() with root privileges, since | ||
|
|
404 | + * it could be used to bypass access control if the mounted path | ||
|
|
405 | + * contains elements inaccessible to the user running isolate. | ||
|
|
406 | + * | ||
|
|
407 | + * We switch effective UID and GID back to the calling user (which clears | ||
|
|
408 | + * all capabilities, but keeps them in the permitted set) and then | ||
|
|
409 | + * enable CAP_SYS_ADMIN. So we have CAP_SYS_ADMIN (needed for mount), | ||
|
|
410 | + * but not CAP_DAC_OVERRIDE (which allows to bypass permission checks). | ||
|
|
411 | + */ | ||
|
|
412 | + | ||
|
|
413 | + if (setresuid(orig_uid, orig_uid, 0) < 0 || | ||
|
|
414 | + setresgid(orig_gid, orig_gid, 0) < 0) | ||
|
|
415 | + die("Cannot switch UID and GID: %m"); | ||
|
|
416 | + | ||
|
|
417 | + set_cap_sys_admin(); | ||
|
|
418 | + | ||
|
|
419 | + // Most mount flags need remount to work | ||
|
|
420 | + if (mount(out, root_in, "none", mount_flags, "") < 0 || | ||
|
|
421 | + mount(out, root_in, "none", MS_REMOUNT | mount_flags, "") < 0) | ||
|
|
422 | + die("Cannot mount %s on %s: %m", out, in); | ||
|
|
423 | + | ||
|
|
424 | + if (setresuid(orig_uid, 0, orig_uid) < 0 || | ||
|
|
425 | + setresgid(orig_gid, 0, orig_gid) < 0) | ||
|
|
426 | + die("Cannot switch UID and GID: %m"); | ||
|
|
427 | + } | ||
|
|
428 | + } | ||
|
|
429 | + } | ||
|
|
430 | + | ||
|
|
431 | + /*** Disk quotas ***/ | ||
|
|
432 | + | ||
|
|
433 | + static int | ||
|
|
434 | + path_begins_with(char *path, char *with) | ||
|
|
435 | + { | ||
|
|
436 | + while (*with) | ||
|
|
437 | + if (*path++ != *with++) | ||
|
|
438 | + return 0; | ||
|
|
439 | + return (!*with || *with == '/'); | ||
|
|
440 | + } | ||
|
|
441 | + | ||
|
|
442 | + static char * | ||
|
|
443 | + find_device(char *path) | ||
|
|
444 | + { | ||
|
|
445 | + FILE *f = setmntent("/proc/mounts", "r"); | ||
|
|
446 | + if (!f) | ||
|
|
447 | + die("Cannot open /proc/mounts: %m"); | ||
|
|
448 | + | ||
|
|
449 | + struct mntent *me; | ||
|
|
450 | + int best_len = 0; | ||
|
|
451 | + char *best_dev = NULL; | ||
|
|
452 | + while (me = getmntent(f)) | ||
|
|
453 | + { | ||
|
|
454 | + if (!path_begins_with(me->mnt_fsname, "/dev")) | ||
|
|
455 | + continue; | ||
|
|
456 | + if (path_begins_with(path, me->mnt_dir)) | ||
|
|
457 | + { | ||
|
|
458 | + int len = strlen(me->mnt_dir); | ||
|
|
459 | + if (len > best_len) | ||
|
|
460 | + { | ||
|
|
461 | + best_len = len; | ||
|
|
462 | + free(best_dev); | ||
|
|
463 | + best_dev = xstrdup(me->mnt_fsname); | ||
|
|
464 | + } | ||
|
|
465 | + } | ||
|
|
466 | + } | ||
|
|
467 | + endmntent(f); | ||
|
|
468 | + return best_dev; | ||
|
|
469 | + } | ||
|
|
470 | + | ||
|
|
471 | + void | ||
|
|
472 | + set_quota(void) | ||
|
|
473 | + { | ||
|
|
474 | + if (!block_quota) | ||
|
|
475 | + return; | ||
|
|
476 | + | ||
|
|
477 | + char cwd[PATH_MAX]; | ||
|
|
478 | + if (!getcwd(cwd, sizeof(cwd))) | ||
|
|
479 | + die("getcwd: %m"); | ||
|
|
480 | + | ||
|
|
481 | + char *dev = find_device(cwd); | ||
|
|
482 | + if (!dev) | ||
|
|
483 | + die("Cannot identify filesystem which contains %s", cwd); | ||
|
|
484 | + msg("Quota: Mapped path %s to a filesystem on %s\n", cwd, dev); | ||
|
|
485 | + | ||
|
|
486 | + // Sanity check | ||
|
|
487 | + struct stat dev_st, cwd_st; | ||
|
|
488 | + if (stat(dev, &dev_st) < 0) | ||
|
|
489 | + die("Cannot identify block device %s: %m", dev); | ||
|
|
490 | + if (!S_ISBLK(dev_st.st_mode)) | ||
|
|
491 | + die("Expected that %s is a block device", dev); | ||
|
|
492 | + if (stat(".", &cwd_st) < 0) | ||
|
|
493 | + die("Cannot stat cwd: %m"); | ||
|
|
494 | + if (cwd_st.st_dev != dev_st.st_rdev) | ||
|
|
495 | + die("Identified %s as a filesystem on %s, but it is obviously false", cwd, dev); | ||
|
|
496 | + | ||
|
|
497 | + struct dqblk dq = { | ||
|
|
498 | + .dqb_bhardlimit = block_quota, | ||
|
|
499 | + .dqb_bsoftlimit = block_quota, | ||
|
|
500 | + .dqb_ihardlimit = inode_quota, | ||
|
|
501 | + .dqb_isoftlimit = inode_quota, | ||
|
|
502 | + .dqb_valid = QIF_LIMITS, | ||
|
|
503 | + }; | ||
|
|
504 | + if (quotactl(QCMD(Q_SETQUOTA, USRQUOTA), dev, box_uid, (caddr_t) &dq) < 0) | ||
|
|
505 | + die("Cannot set disk quota: %m"); | ||
|
|
506 | + msg("Quota: Set block quota %d and inode quota %d\n", block_quota, inode_quota); | ||
|
|
507 | + | ||
|
|
508 | + free(dev); | ||
|
|
509 | + } |
@@ -0,0 +1,182 | |||||
|
|
1 | + /* | ||
|
|
2 | + * Process Isolator -- Utility Functions | ||
|
|
3 | + * | ||
|
|
4 | + * (c) 2012-2017 Martin Mares <mj@ucw.cz> | ||
|
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | ||
|
|
6 | + */ | ||
|
|
7 | + | ||
|
|
8 | + #include "isolate.h" | ||
|
|
9 | + | ||
|
|
10 | + #include <dirent.h> | ||
|
|
11 | + #include <errno.h> | ||
|
|
12 | + #include <ftw.h> | ||
|
|
13 | + #include <stdio.h> | ||
|
|
14 | + #include <stdlib.h> | ||
|
|
15 | + #include <string.h> | ||
|
|
16 | + #include <sys/fsuid.h> | ||
|
|
17 | + #include <sys/stat.h> | ||
|
|
18 | + #include <unistd.h> | ||
|
|
19 | + | ||
|
|
20 | + void * | ||
|
|
21 | + xmalloc(size_t size) | ||
|
|
22 | + { | ||
|
|
23 | + void *p = malloc(size); | ||
|
|
24 | + if (!p) | ||
|
|
25 | + die("Out of memory"); | ||
|
|
26 | + return p; | ||
|
|
27 | + } | ||
|
|
28 | + | ||
|
|
29 | + char * | ||
|
|
30 | + xstrdup(char *str) | ||
|
|
31 | + { | ||
|
|
32 | + char *p = strdup(str); | ||
|
|
33 | + if (!p) | ||
|
|
34 | + die("Out of memory"); | ||
|
|
35 | + return p; | ||
|
|
36 | + } | ||
|
|
37 | + | ||
|
|
38 | + int | ||
|
|
39 | + dir_exists(char *path) | ||
|
|
40 | + { | ||
|
|
41 | + struct stat st; | ||
|
|
42 | + return (stat(path, &st) >= 0 && S_ISDIR(st.st_mode)); | ||
|
|
43 | + } | ||
|
|
44 | + | ||
|
|
45 | + void | ||
|
|
46 | + make_dir(char *path) | ||
|
|
47 | + { | ||
|
|
48 | + char *sep = (path[0] == '/' ? path+1 : path); | ||
|
|
49 | + | ||
|
|
50 | + for (;;) | ||
|
|
51 | + { | ||
|
|
52 | + sep = strchr(sep, '/'); | ||
|
|
53 | + if (sep) | ||
|
|
54 | + *sep = 0; | ||
|
|
55 | + | ||
|
|
56 | + if (mkdir(path, 0777) < 0 && errno != EEXIST) | ||
|
|
57 | + die("Cannot create directory %s: %m", path); | ||
|
|
58 | + | ||
|
|
59 | + if (!sep) | ||
|
|
60 | + break; | ||
|
|
61 | + *sep++ = '/'; | ||
|
|
62 | + } | ||
|
|
63 | + | ||
|
|
64 | + // mkdir() above may have returned EEXIST even if the path was not | ||
|
|
65 | + // a directory. Ensure that it is. | ||
|
|
66 | + struct stat st; | ||
|
|
67 | + if (stat(path, &st) < 0) | ||
|
|
68 | + die("Cannot stat %s: %m", path); | ||
|
|
69 | + if (!S_ISDIR(st.st_mode)) | ||
|
|
70 | + die("Cannot create %s: already exists, but not a directory", path); | ||
|
|
71 | + } | ||
|
|
72 | + | ||
|
|
73 | + | ||
|
|
74 | + static int | ||
|
|
75 | + rmtree_helper(const char *fpath, const struct stat *sb, int typeflag UNUSED, struct FTW *ftwbuf UNUSED) | ||
|
|
76 | + { | ||
|
|
77 | + if (S_ISDIR(sb->st_mode)) | ||
|
|
78 | + { | ||
|
|
79 | + if (rmdir(fpath) < 0) | ||
|
|
80 | + die("Cannot rmdir %s: %m", fpath); | ||
|
|
81 | + } | ||
|
|
82 | + else | ||
|
|
83 | + { | ||
|
|
84 | + if (unlink(fpath) < 0) | ||
|
|
85 | + die("Cannot unlink %s: %m", fpath); | ||
|
|
86 | + } | ||
|
|
87 | + return 0; | ||
|
|
88 | + } | ||
|
|
89 | + | ||
|
|
90 | + void | ||
|
|
91 | + rmtree(char *path) | ||
|
|
92 | + { | ||
|
|
93 | + nftw(path, rmtree_helper, 32, FTW_MOUNT | FTW_PHYS | FTW_DEPTH); | ||
|
|
94 | + } | ||
|
|
95 | + | ||
|
|
96 | + static uid_t chown_uid; | ||
|
|
97 | + static gid_t chown_gid; | ||
|
|
98 | + | ||
|
|
99 | + static int | ||
|
|
100 | + chowntree_helper(const char *fpath, const struct stat *sb UNUSED, int typeflag UNUSED, struct FTW *ftwbuf UNUSED) | ||
|
|
101 | + { | ||
|
|
102 | + if (lchown(fpath, chown_uid, chown_gid) < 0) | ||
|
|
103 | + die("Cannot chown %s: %m", fpath); | ||
|
|
104 | + else | ||
|
|
105 | + return 0; | ||
|
|
106 | + } | ||
|
|
107 | + | ||
|
|
108 | + void | ||
|
|
109 | + chowntree(char *path, uid_t uid, gid_t gid) | ||
|
|
110 | + { | ||
|
|
111 | + chown_uid = uid; | ||
|
|
112 | + chown_gid = gid; | ||
|
|
113 | + nftw(path, chowntree_helper, 32, FTW_MOUNT | FTW_PHYS); | ||
|
|
114 | + } | ||
|
|
115 | + | ||
|
|
116 | + static int fd_to_keep = -1; | ||
|
|
117 | + | ||
|
|
118 | + void | ||
|
|
119 | + close_all_fds(void) | ||
|
|
120 | + { | ||
|
|
121 | + /* Close all file descriptors except 0, 1, 2 */ | ||
|
|
122 | + | ||
|
|
123 | + DIR *dir = opendir("/proc/self/fd"); | ||
|
|
124 | + if (!dir) | ||
|
|
125 | + die("Cannot open /proc/self/fd: %m"); | ||
|
|
126 | + int dir_fd = dirfd(dir); | ||
|
|
127 | + | ||
|
|
128 | + struct dirent *e; | ||
|
|
129 | + while (e = readdir(dir)) | ||
|
|
130 | + { | ||
|
|
131 | + char *end; | ||
|
|
132 | + long int fd = strtol(e->d_name, &end, 10); | ||
|
|
133 | + if (*end) | ||
|
|
134 | + continue; | ||
|
|
135 | + if (fd >= 0 && fd <= 2 || fd == dir_fd || fd == fd_to_keep) | ||
|
|
136 | + continue; | ||
|
|
137 | + close(fd); | ||
|
|
138 | + } | ||
|
|
139 | + | ||
|
|
140 | + closedir(dir); | ||
|
|
141 | + } | ||
|
|
142 | + | ||
|
|
143 | + /*** Meta-files ***/ | ||
|
|
144 | + | ||
|
|
145 | + static FILE *metafile; | ||
|
|
146 | + | ||
|
|
147 | + void | ||
|
|
148 | + meta_open(const char *name) | ||
|
|
149 | + { | ||
|
|
150 | + if (!strcmp(name, "-")) | ||
|
|
151 | + { | ||
|
|
152 | + metafile = stdout; | ||
|
|
153 | + return; | ||
|
|
154 | + } | ||
|
|
155 | + if (setfsuid(getuid()) < 0) | ||
|
|
156 | + die("Failed to switch FS UID: %m"); | ||
|
|
157 | + metafile = fopen(name, "w"); | ||
|
|
158 | + if (setfsuid(geteuid()) < 0) | ||
|
|
159 | + die("Failed to switch FS UID back: %m"); | ||
|
|
160 | + if (!metafile) | ||
|
|
161 | + die("Failed to open metafile '%s'",name); | ||
|
|
162 | + fd_to_keep = fileno(metafile); | ||
|
|
163 | + } | ||
|
|
164 | + | ||
|
|
165 | + void | ||
|
|
166 | + meta_close(void) | ||
|
|
167 | + { | ||
|
|
168 | + if (metafile && metafile != stdout) | ||
|
|
169 | + fclose(metafile); | ||
|
|
170 | + } | ||
|
|
171 | + | ||
|
|
172 | + void | ||
|
|
173 | + meta_printf(const char *fmt, ...) | ||
|
|
174 | + { | ||
|
|
175 | + if (!metafile) | ||
|
|
176 | + return; | ||
|
|
177 | + | ||
|
|
178 | + va_list args; | ||
|
|
179 | + va_start(args, fmt); | ||
|
|
180 | + vfprintf(metafile, fmt, args); | ||
|
|
181 | + va_end(args); | ||
|
|
182 | + } |
@@ -77,6 +77,8 | |||||
|
77 | raise "engine: No test data." |
|
77 | raise "engine: No test data." |
|
78 | end |
|
78 | end |
|
79 |
|
79 | ||
|
|
80 | + talk "ENGINE: grading dir at #{grading_dir} is created" | ||
|
|
81 | + | ||
|
80 | # copy the source script, using lock |
|
82 | # copy the source script, using lock |
|
81 | dinit = DirInit::Manager.new(problem_home) |
|
83 | dinit = DirInit::Manager.new(problem_home) |
|
82 |
|
84 | ||
@@ -84,8 +86,10 | |||||
|
84 | dinit.setup do |
|
86 | dinit.setup do |
|
85 | copy_log = copy_script(problem_home) |
|
87 | copy_log = copy_script(problem_home) |
|
86 | save_copy_log(problem_home,copy_log) |
|
88 | save_copy_log(problem_home,copy_log) |
|
|
89 | + talk "ENGINE: following std script is copied: #{copy_log.join ' '}" | ||
|
87 | end |
|
90 | end |
|
88 |
|
91 | ||
|
|
92 | + | ||
|
89 | call_judge(problem_home,language,grading_dir,source_name) |
|
93 | call_judge(problem_home,language,grading_dir,source_name) |
|
90 |
|
94 | ||
|
91 | @reporter.report(submission,"#{grading_dir}/test-result") |
|
95 | @reporter.report(submission,"#{grading_dir}/test-result") |
@@ -121,11 +125,10 | |||||
|
121 | ENV['PROBLEM_HOME'] = problem_home |
|
125 | ENV['PROBLEM_HOME'] = problem_home |
|
122 | ENV['RUBYOPT'] = '' |
|
126 | ENV['RUBYOPT'] = '' |
|
123 |
|
127 | ||
|
124 | - talk grading_dir |
|
||
|
125 | Dir.chdir grading_dir |
|
128 | Dir.chdir grading_dir |
|
126 | script_name = "#{problem_home}/script/judge" |
|
129 | script_name = "#{problem_home}/script/judge" |
|
127 | cmd = "#{script_name} #{language} #{fname}" |
|
130 | cmd = "#{script_name} #{language} #{fname}" |
|
128 |
- talk " |
|
131 | + talk "ENGINE: Calling Judge at #{cmd}" |
|
129 | warn "ERROR: file does not exists #{script_name}" unless File.exists? script_name |
|
132 | warn "ERROR: file does not exists #{script_name}" unless File.exists? script_name |
|
130 | system(cmd) |
|
133 | system(cmd) |
|
131 | end |
|
134 | end |
@@ -1,6 +1,6 | |||||
|
1 | # |
|
1 | # |
|
2 | # A runner drives the engine into various tasks. |
|
2 | # A runner drives the engine into various tasks. |
|
3 |
- # |
|
3 | + # |
|
4 |
|
4 | ||
|
5 | module Grader |
|
5 | module Grader |
|
6 |
|
6 | ||
@@ -15,7 +15,7 | |||||
|
15 | task = Task.get_inqueue_and_change_status(Task::STATUS_GRADING) |
|
15 | task = Task.get_inqueue_and_change_status(Task::STATUS_GRADING) |
|
16 | if task!=nil |
|
16 | if task!=nil |
|
17 | @grader_process.report_active(task) if @grader_process!=nil |
|
17 | @grader_process.report_active(task) if @grader_process!=nil |
|
18 | - |
|
18 | + |
|
19 | submission = Submission.find(task.submission_id) |
|
19 | submission = Submission.find(task.submission_id) |
|
20 | @engine.grade(submission) |
|
20 | @engine.grade(submission) |
|
21 | task.status_complete! |
|
21 | task.status_complete! |
@@ -52,7 +52,7 | |||||
|
52 | end |
|
52 | end |
|
53 |
|
53 | ||
|
54 | def grade_submission(submission) |
|
54 | def grade_submission(submission) |
|
55 |
- puts " |
|
55 | + puts "RUNNER: grade submission: #{submission.id} by #{submission.try(:user).try(:full_name)}" |
|
56 | @engine.grade(submission) |
|
56 | @engine.grade(submission) |
|
57 | end |
|
57 | end |
|
58 |
|
58 | ||
@@ -60,7 +60,7 | |||||
|
60 | test_request = TestRequest.get_inqueue_and_change_status(Task::STATUS_GRADING) |
|
60 | test_request = TestRequest.get_inqueue_and_change_status(Task::STATUS_GRADING) |
|
61 | if test_request!=nil |
|
61 | if test_request!=nil |
|
62 | @grader_process.report_active(test_request) if @grader_process!=nil |
|
62 | @grader_process.report_active(test_request) if @grader_process!=nil |
|
63 | - |
|
63 | + |
|
64 | @engine.grade(test_request) |
|
64 | @engine.grade(test_request) |
|
65 | test_request.status_complete! |
|
65 | test_request.status_complete! |
|
66 | @grader_process.report_inactive(test_request) if @grader_process!=nil |
|
66 | @grader_process.report_inactive(test_request) if @grader_process!=nil |
@@ -42,7 +42,7 | |||||
|
42 | def report(sub,test_result_dir) |
|
42 | def report(sub,test_result_dir) |
|
43 | result = read_result(test_result_dir) |
|
43 | result = read_result(test_result_dir) |
|
44 | if @result_collector |
|
44 | if @result_collector |
|
45 |
- @result_collector.save(sub, |
|
45 | + @result_collector.save(sub, |
|
46 | result) |
|
46 | result) |
|
47 | end |
|
47 | end |
|
48 | save_result(sub,result) |
|
48 | save_result(sub,result) |
@@ -67,8 +67,9 | |||||
|
67 | else |
|
67 | else |
|
68 | params[param_name] = default |
|
68 | params[param_name] = default |
|
69 | end |
|
69 | end |
|
70 | - talk "#{param_name}: #{params[param_name]}" |
|
70 | + talk "COMPILE: param: #{param_name}: #{params[param_name]}" |
|
71 | end |
|
71 | end |
|
|
72 | + talk "COMPILE: working dir = " + Dir.pwd | ||
|
72 |
|
73 | ||
|
73 | # Remove any remaining output files or message files. |
|
74 | # Remove any remaining output files or message files. |
|
74 | if FileTest.exists? params[:output_file] |
|
75 | if FileTest.exists? params[:output_file] |
@@ -80,7 +81,7 | |||||
|
80 |
|
81 | ||
|
81 | # Check if the source file exists before attempt compiling. |
|
82 | # Check if the source file exists before attempt compiling. |
|
82 | if !FileTest.exists? params[:source_file] |
|
83 | if !FileTest.exists? params[:source_file] |
|
83 | - talk("ERROR: The source file does not exist!") |
|
84 | + talk("COMPILE: ERROR: The source file does not exist!") |
|
84 | open(params[:message_file],"w") do |f| |
|
85 | open(params[:message_file],"w") do |f| |
|
85 | f.puts "ERROR: The source file did not exist." |
|
86 | f.puts "ERROR: The source file did not exist." |
|
86 | end |
|
87 | end |
@@ -91,19 +92,23 | |||||
|
91 | params[:prog_lang] = 'c++' |
|
92 | params[:prog_lang] = 'c++' |
|
92 | end |
|
93 | end |
|
93 |
|
94 | ||
|
|
95 | + | ||
|
94 | # Compile. |
|
96 | # Compile. |
|
95 | case params[:prog_lang] |
|
97 | case params[:prog_lang] |
|
96 |
|
98 | ||
|
97 | when "c" |
|
99 | when "c" |
|
98 | command = "#{C_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{C_OPTIONS}" |
|
100 | command = "#{C_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{C_OPTIONS}" |
|
|
101 | + talk "COMPILE: compiling command [#{command}]" | ||
|
99 | system(command, err: params[:message_file]) |
|
102 | system(command, err: params[:message_file]) |
|
100 |
|
103 | ||
|
101 | when "c++" |
|
104 | when "c++" |
|
102 | command = "#{CPLUSPLUS_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{CPLUSPLUS_OPTIONS}" |
|
105 | command = "#{CPLUSPLUS_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{CPLUSPLUS_OPTIONS}" |
|
|
106 | + talk "COMPILE: compiling command [#{command}]" | ||
|
103 | system(command, err: params[:message_file]) |
|
107 | system(command, err: params[:message_file]) |
|
104 |
|
108 | ||
|
105 | when "pas" |
|
109 | when "pas" |
|
106 | command = "#{PASCAL_COMPILER} #{params[:source_file]} -ooutpas #{PASCAL_OPTIONS}" |
|
110 | command = "#{PASCAL_COMPILER} #{params[:source_file]} -ooutpas #{PASCAL_OPTIONS}" |
|
|
111 | + talk "COMPILE: compiling command [#{command}]" | ||
|
107 | system(command,out: params[:message_file]) |
|
112 | system(command,out: params[:message_file]) |
|
108 | FileUtils.mv("output", params[:output_file]) |
|
113 | FileUtils.mv("output", params[:output_file]) |
|
109 |
|
114 | ||
@@ -126,6 +131,7 | |||||
|
126 | end |
|
131 | end |
|
127 | #system("cp #{params[:source_file]} #{classname}.java") |
|
132 | #system("cp #{params[:source_file]} #{classname}.java") |
|
128 | command = "#{JAVA_COMPILER} -encoding utf8 #{classname}.java" |
|
133 | command = "#{JAVA_COMPILER} -encoding utf8 #{classname}.java" |
|
|
134 | + talk "COMPILE: compiling command [#{command}]" | ||
|
129 | system(command, err: params[:message_file]) |
|
135 | system(command, err: params[:message_file]) |
|
130 | if File.exists?(classname + ".class") |
|
136 | if File.exists?(classname + ".class") |
|
131 | File.open(params[:output_file],"w") {|file| file.write("#{classname}")} |
|
137 | File.open(params[:output_file],"w") {|file| file.write("#{classname}")} |
@@ -136,6 +142,7 | |||||
|
136 |
|
142 | ||
|
137 | when "ruby" |
|
143 | when "ruby" |
|
138 | command = "#{RUBY_INTERPRETER} -c #{params[:source_file]}" |
|
144 | command = "#{RUBY_INTERPRETER} -c #{params[:source_file]}" |
|
|
145 | + talk "COMPILE: compiling command [#{command}]" | ||
|
139 | if system(command, err: params[:message_file]) |
|
146 | if system(command, err: params[:message_file]) |
|
140 | File.open(params[:output_file],"w") do |out_file| |
|
147 | File.open(params[:output_file],"w") do |out_file| |
|
141 | out_file.puts "#!#{RUBY_INTERPRETER}" |
|
148 | out_file.puts "#!#{RUBY_INTERPRETER}" |
@@ -151,11 +158,9 | |||||
|
151 | #if system(command, out: params[:message_file]) |
|
158 | #if system(command, out: params[:message_file]) |
|
152 | #compile to python bytecode |
|
159 | #compile to python bytecode |
|
153 | command = "#{PYTHON_INTERPRETER} -c \"import py_compile; py_compile.compile('#{params[:source_file]}','#{params[:source_file]}c');\"" |
|
160 | command = "#{PYTHON_INTERPRETER} -c \"import py_compile; py_compile.compile('#{params[:source_file]}','#{params[:source_file]}c');\"" |
|
154 |
- |
|
161 | + talk "COMPILE: compiling command [#{command}]" |
|
155 | system(command, err: params[:message_file]) |
|
162 | system(command, err: params[:message_file]) |
|
156 | if FileTest.exists?("#{params[:source_file]}c") |
|
163 | if FileTest.exists?("#{params[:source_file]}c") |
|
157 | - puts "pwd: " + Dir.pwd |
|
||
|
158 | - Dir.new('.').each {|file| puts file} |
|
||
|
159 | File.open(params[:output_file],"w") do |out_file| |
|
164 | File.open(params[:output_file],"w") do |out_file| |
|
160 | out_file.puts "#!#{PYTHON_INTERPRETER} #{params[:source_file]}c" |
|
165 | out_file.puts "#!#{PYTHON_INTERPRETER} #{params[:source_file]}c" |
|
161 | end |
|
166 | end |
@@ -178,10 +183,11 | |||||
|
178 |
|
183 | ||
|
179 | when "haskell" |
|
184 | when "haskell" |
|
180 | command = "#{HASKELL_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{HASKELL_OPTIONS}" |
|
185 | command = "#{HASKELL_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{HASKELL_OPTIONS}" |
|
|
186 | + talk "COMPILE: compiling command [#{command}]" | ||
|
181 | system(command, err: params[:message_file]) |
|
187 | system(command, err: params[:message_file]) |
|
182 |
|
188 | ||
|
183 | else |
|
189 | else |
|
184 | - talk("ERROR: Invalid language specified!") |
|
190 | + talk("COMPILE: ERROR: Invalid language specified!") |
|
185 | open(params[:message_file],"w") do |f| |
|
191 | open(params[:message_file],"w") do |f| |
|
186 | f.puts "ERROR: Invalid language specified!" |
|
192 | f.puts "ERROR: Invalid language specified!" |
|
187 | end |
|
193 | end |
@@ -190,7 +196,7 | |||||
|
190 |
|
196 | ||
|
191 | # Report success or failure. |
|
197 | # Report success or failure. |
|
192 | if FileTest.exists? params[:output_file] |
|
198 | if FileTest.exists? params[:output_file] |
|
193 |
- talk "Compilation was successful!" |
|
199 | + talk "COMPILE: Compilation was successful!" |
|
194 | else |
|
200 | else |
|
195 | - talk "ERROR: Something was wrong during the compilation!" |
|
201 | + talk "COMPILE: ERROR: Something was wrong during the compilation!" |
|
196 | end |
|
202 | end |
@@ -28,7 +28,7 | |||||
|
28 | begin |
|
28 | begin |
|
29 | yield |
|
29 | yield |
|
30 | rescue |
|
30 | rescue |
|
31 | - msg = "ERROR: #{error_message}" |
|
31 | + msg = "JUDGE: ERROR: #{error_message}" |
|
32 | log msg |
|
32 | log msg |
|
33 | raise msg |
|
33 | raise msg |
|
34 | end |
|
34 | end |
@@ -54,18 +54,18 | |||||
|
54 |
|
54 | ||
|
55 | language = ARGV[0] |
|
55 | language = ARGV[0] |
|
56 | if language != "c" && language != "c++" && language != "pas" && language != "java" && language != "ruby" && language != "python" && language != "php" && language != "haskell" |
|
56 | if language != "c" && language != "c++" && language != "pas" && language != "java" && language != "ruby" && language != "python" && language != "php" && language != "haskell" |
|
57 | - log "You specified a language that is not supported: #{language}." |
|
57 | + log "JUDGE: You specified a language that is not supported: #{language}." |
|
58 | exit(127) |
|
58 | exit(127) |
|
59 | end |
|
59 | end |
|
60 |
|
60 | ||
|
61 | source_file = ARGV[1] |
|
61 | source_file = ARGV[1] |
|
62 | ENV['SOURCE_NAME'] = source_file |
|
62 | ENV['SOURCE_NAME'] = source_file |
|
63 | if File.exist?(source_file) == false |
|
63 | if File.exist?(source_file) == false |
|
64 | - log "The source file does not exist." |
|
64 | + log "JUDGE: The source file does not exist." |
|
65 | exit(127) |
|
65 | exit(127) |
|
66 | end |
|
66 | end |
|
67 |
|
67 | ||
|
68 | - log "Making test result and sandbox directories..." |
|
68 | + log "JUDGE: Making test result and sandbox directories..." |
|
69 |
|
69 | ||
|
70 | current_dir = FileUtils.pwd |
|
70 | current_dir = FileUtils.pwd |
|
71 | current_dir.strip! |
|
71 | current_dir.strip! |
@@ -76,7 +76,7 | |||||
|
76 | test_result_dir = "#{current_dir}/test-result" |
|
76 | test_result_dir = "#{current_dir}/test-result" |
|
77 | end |
|
77 | end |
|
78 |
|
78 | ||
|
79 | - log "Test result directory: #{test_result_dir}" |
|
79 | + log "JUDGE: Test result directory: #{test_result_dir}" |
|
80 | clear_and_create_empty_dir(test_result_dir) |
|
80 | clear_and_create_empty_dir(test_result_dir) |
|
81 |
|
81 | ||
|
82 | if ARGV.length >= 4 |
|
82 | if ARGV.length >= 4 |
@@ -84,19 +84,21 | |||||
|
84 | else |
|
84 | else |
|
85 | sandbox_dir = "#{current_dir}/sandbox" |
|
85 | sandbox_dir = "#{current_dir}/sandbox" |
|
86 | end |
|
86 | end |
|
87 | - log "Sandbox directory: #{sandbox_dir}" |
|
87 | + log "JUDGE: Sandbox directory: #{sandbox_dir}" |
|
88 | clear_and_create_empty_dir(sandbox_dir) |
|
88 | clear_and_create_empty_dir(sandbox_dir) |
|
89 |
|
89 | ||
|
|
90 | + # ------------------------------ | ||
|
90 | # Compile |
|
91 | # Compile |
|
|
92 | + # ------------------------------ | ||
|
|
93 | + log "JUDGE: Compiling..." | ||
|
91 | log |
|
94 | log |
|
92 | - log "Compiling..." |
|
||
|
93 | call_and_log("Cannot copy the source file to #{sandbox_dir}") { |
|
95 | call_and_log("Cannot copy the source file to #{sandbox_dir}") { |
|
94 | FileUtils.cp(source_file, sandbox_dir) |
|
96 | FileUtils.cp(source_file, sandbox_dir) |
|
95 | } |
|
97 | } |
|
96 | begin |
|
98 | begin |
|
97 | Dir.chdir sandbox_dir |
|
99 | Dir.chdir sandbox_dir |
|
98 | rescue |
|
100 | rescue |
|
99 | - log "ERROR: Cannot change directory to #{sandbox_dir}." |
|
101 | + log "JUDGE: ERROR: Cannot change directory to #{sandbox_dir}." |
|
100 | exit(127) |
|
102 | exit(127) |
|
101 | end |
|
103 | end |
|
102 | execute("#{problem_home}/script/compile #{language} #{source_file}", "Compilation error!") |
|
104 | execute("#{problem_home}/script/compile #{language} #{source_file}", "Compilation error!") |
@@ -106,7 +108,7 | |||||
|
106 | FileUtils.mv("compiler_message", test_result_dir) |
|
108 | FileUtils.mv("compiler_message", test_result_dir) |
|
107 | } |
|
109 | } |
|
108 | if !FileTest.exist?("a.out") |
|
110 | if !FileTest.exist?("a.out") |
|
109 | - log "Cannot compile the source code. See message in #{test_result_dir}/compile_message" |
|
111 | + log "JUDGE: EROOR: Cannot compile the source code. See message in #{test_result_dir}/compile_message" |
|
110 | exit(127) |
|
112 | exit(127) |
|
111 | else |
|
113 | else |
|
112 | call_and_log("Cannot move the compiled program to #{test_result_dir}") { |
|
114 | call_and_log("Cannot move the compiled program to #{test_result_dir}") { |
@@ -117,6 +119,10 | |||||
|
117 | FileUtils.rm_rf("#{sandbox_dir}/.") |
|
119 | FileUtils.rm_rf("#{sandbox_dir}/.") |
|
118 | end |
|
120 | end |
|
119 |
|
121 | ||
|
|
122 | + | ||
|
|
123 | + #----------------------------------------------- | ||
|
|
124 | + # run | ||
|
|
125 | + #----------------------------------------------- | ||
|
120 | require "#{problem_home}/script/test_dsl.rb" |
|
126 | require "#{problem_home}/script/test_dsl.rb" |
|
121 | load "#{problem_home}/test_cases/all_tests.cfg" |
|
127 | load "#{problem_home}/test_cases/all_tests.cfg" |
|
122 | problem = Problem.get_instance |
|
128 | problem = Problem.get_instance |
@@ -127,13 +133,13 | |||||
|
127 | end |
|
133 | end |
|
128 |
|
134 | ||
|
129 | # Doing the testing. |
|
135 | # Doing the testing. |
|
|
136 | + log | ||
|
|
137 | + log "JUDGE: Running each test case..." | ||
|
130 | (1..(problem.num_tests)).each do |test_num| |
|
138 | (1..(problem.num_tests)).each do |test_num| |
|
131 |
|
139 | ||
|
132 | $stdout.print "[#{test_num}]" |
|
140 | $stdout.print "[#{test_num}]" |
|
133 | $stdout.flush |
|
141 | $stdout.flush |
|
134 |
|
142 | ||
|
135 | - log "Test number: #{test_num}" |
|
||
|
136 | - |
|
||
|
137 | call_and_log("Cannot copy the compiled program into #{sandbox_dir}") { |
|
143 | call_and_log("Cannot copy the compiled program into #{sandbox_dir}") { |
|
138 | FileUtils.cp("#{test_result_dir}/a.out", sandbox_dir, :preserve => true) |
|
144 | FileUtils.cp("#{test_result_dir}/a.out", sandbox_dir, :preserve => true) |
|
139 | if language == "java" then Dir["#{test_result_dir}/*.class"].each { |file| FileUtils.cp(file,sandbox_dir)} end |
|
145 | if language == "java" then Dir["#{test_result_dir}/*.class"].each { |file| FileUtils.cp(file,sandbox_dir)} end |
@@ -173,7 +179,7 | |||||
|
173 |
|
179 | ||
|
174 | # Grade |
|
180 | # Grade |
|
175 | log |
|
181 | log |
|
176 | - log "Grading..." |
|
182 | + log "JUDGE: Grading..." |
|
177 | begin |
|
183 | begin |
|
178 | Dir.chdir test_result_dir |
|
184 | Dir.chdir test_result_dir |
|
179 | rescue |
|
185 | rescue |
@@ -51,13 +51,13 | |||||
|
51 | sandbox_dir = Dir.getwd |
|
51 | sandbox_dir = Dir.getwd |
|
52 |
|
52 | ||
|
53 | if problem.well_formed? == false |
|
53 | if problem.well_formed? == false |
|
54 | - log "The problem specification is not well formed." |
|
54 | + log "RUN: The problem specification is not well formed." |
|
55 | exit(127) |
|
55 | exit(127) |
|
56 | end |
|
56 | end |
|
57 |
|
57 | ||
|
58 | # Check if the test number is okay. |
|
58 | # Check if the test number is okay. |
|
59 | if test_num <= 0 || test_num > problem.num_tests |
|
59 | if test_num <= 0 || test_num > problem.num_tests |
|
60 | - log "You have specified a wrong test number." |
|
60 | + log "RUN: You have specified a wrong test number." |
|
61 | exit(127) |
|
61 | exit(127) |
|
62 | end |
|
62 | end |
|
63 |
|
63 | ||
@@ -119,8 +119,8 | |||||
|
119 | end |
|
119 | end |
|
120 |
|
120 | ||
|
121 |
|
121 | ||
|
122 | - log "Running test #{test_num}..." |
|
122 | + log "RUN: Running test #{test_num}..." |
|
123 | - log run_command |
|
123 | + log "RUN: Run command = [#{run_command}]" |
|
124 | log |
|
124 | log |
|
125 | system(run_command,err: 'run_result') |
|
125 | system(run_command,err: 'run_result') |
|
126 |
|
126 |
You need to be logged in to leave comments.
Login now