Description:
- add isolate
- more comment and output for each script
Commit status:
[Not Reviewed]
References:
Diff options:
Comments:
0 Commit comments
0 Inline Comments
Unresolved TODOs:
There are no unresolved TODOs
r256:78af34fd4a2e - - 20 files changed: 3163 inserted, 27 deleted
@@ -0,0 +1,17 | |||
|
1 | + language: c | |
|
2 | + | |
|
3 | + compiler: gcc | |
|
4 | + | |
|
5 | + addons: | |
|
6 | + apt: | |
|
7 | + packages: | |
|
8 | + - asciidoc | |
|
9 | + - libcap-dev | |
|
10 | + - libxml2-utils | |
|
11 | + - xsltproc | |
|
12 | + - docbook-xml | |
|
13 | + - docbook-xsl | |
|
14 | + | |
|
15 | + script: | |
|
16 | + - make DESTDIR=/tmp/isolate | |
|
17 | + - make DESTDIR=/tmp/isolate install |
@@ -0,0 +1,12 | |||
|
1 | + Isolate is free software: you can redistribute it and/or modify | |
|
2 | + it under the terms of the GNU General Public License as published by | |
|
3 | + the Free Software Foundation, either version 2 of the License, or | |
|
4 | + (at your option) any later version. | |
|
5 | + | |
|
6 | + This program is distributed in the hope that it will be useful, | |
|
7 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
|
8 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
|
9 | + GNU General Public License for more details. | |
|
10 | + | |
|
11 | + If you have less than 10 copies of the GPL on your system :-), | |
|
12 | + you can find it at http://www.gnu.org/licenses/. |
@@ -0,0 +1,67 | |||
|
1 | + # Makefile for Isolate | |
|
2 | + # (c) 2015--2018 Martin Mares <mj@ucw.cz> | |
|
3 | + # (c) 2017 Bernard Blackham <bernard@blackham.com.au> | |
|
4 | + | |
|
5 | + all: isolate isolate.1 isolate.1.html isolate-check-environment | |
|
6 | + | |
|
7 | + CC=gcc | |
|
8 | + CFLAGS=-std=gnu99 -Wall -Wextra -Wno-parentheses -Wno-unused-result -Wno-missing-field-initializers -Wstrict-prototypes -Wmissing-prototypes -D_GNU_SOURCE | |
|
9 | + LIBS=-lcap | |
|
10 | + | |
|
11 | + VERSION=1.5 | |
|
12 | + YEAR=2018 | |
|
13 | + BUILD_DATE:=$(shell date '+%Y-%m-%d') | |
|
14 | + BUILD_COMMIT:=$(shell if git rev-parse >/dev/null 2>/dev/null ; then git describe --always --tags ; else echo '<unknown>' ; fi) | |
|
15 | + | |
|
16 | + PREFIX = $(DESTDIR)/usr/local | |
|
17 | + VARPREFIX = $(DESTDIR)/var/local | |
|
18 | + CONFIGDIR = $(PREFIX)/etc | |
|
19 | + CONFIG = $(CONFIGDIR)/isolate | |
|
20 | + BINDIR = $(PREFIX)/bin | |
|
21 | + DATAROOTDIR = $(PREFIX)/share | |
|
22 | + DATADIR = $(DATAROOTDIR) | |
|
23 | + MANDIR = $(DATADIR)/man | |
|
24 | + MAN1DIR = $(MANDIR)/man1 | |
|
25 | + BOXDIR = $(VARPREFIX)/lib/isolate | |
|
26 | + | |
|
27 | + isolate: isolate.o util.o rules.o cg.o config.o | |
|
28 | + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) | |
|
29 | + | |
|
30 | + %.o: %.c isolate.h config.h | |
|
31 | + $(CC) $(CFLAGS) -c -o $@ $< | |
|
32 | + | |
|
33 | + isolate.o: CFLAGS += -DVERSION='"$(VERSION)"' -DYEAR='"$(YEAR)"' -DBUILD_DATE='"$(BUILD_DATE)"' -DBUILD_COMMIT='"$(BUILD_COMMIT)"' | |
|
34 | + config.o: CFLAGS += -DCONFIG_FILE='"$(CONFIG)"' | |
|
35 | + | |
|
36 | + isolate.1: isolate.1.txt | |
|
37 | + a2x -f manpage $< | |
|
38 | + | |
|
39 | + # The dependency on isolate.1 is there to serialize both calls of asciidoc, | |
|
40 | + # which does not name temporary files safely. | |
|
41 | + isolate.1.html: isolate.1.txt isolate.1 | |
|
42 | + a2x -f xhtml -D . $< | |
|
43 | + | |
|
44 | + clean: | |
|
45 | + rm -f *.o | |
|
46 | + rm -f isolate isolate.1 isolate.1.html | |
|
47 | + rm -f docbook-xsl.css | |
|
48 | + | |
|
49 | + install: isolate isolate-check-environment | |
|
50 | + install -d $(BINDIR) $(BOXDIR) $(CONFIGDIR) | |
|
51 | + install isolate-check-environment $(BINDIR) | |
|
52 | + install -m 4755 isolate $(BINDIR) | |
|
53 | + install -m 644 default.cf $(CONFIG) | |
|
54 | + | |
|
55 | + install-doc: isolate.1 | |
|
56 | + install -d $(MAN1DIR) | |
|
57 | + install -m 644 $< $(MAN1DIR)/$< | |
|
58 | + | |
|
59 | + release: isolate.1.html | |
|
60 | + git tag v$(VERSION) | |
|
61 | + git push --tags | |
|
62 | + git archive --format=tar --prefix=isolate-$(VERSION)/ HEAD | gzip >isolate-$(VERSION).tar.gz | |
|
63 | + rsync isolate-$(VERSION).tar.gz atrey:ftp/isolate/ | |
|
64 | + rsync isolate.1.html jw:/var/www/moe/ | |
|
65 | + ssh jw 'cd web && bin/release-prog isolate $(VERSION)' | |
|
66 | + | |
|
67 | + .PHONY: all clean install install-doc release |
@@ -0,0 +1,29 | |||
|
1 | + isolate | |
|
2 | + ======= | |
|
3 | + | |
|
4 | + Isolate is a sandbox built to safely run untrusted executables, | |
|
5 | + offering them a limited-access environment and preventing them from | |
|
6 | + affecting the host system. It takes advantage of features specific to | |
|
7 | + the Linux kernel, like namespaces and control groups. | |
|
8 | + | |
|
9 | + Isolate was developed by Martin Mareš (<mj@ucw.cz>) and Bernard Blackham | |
|
10 | + (<bernard@blackham.com.au>), who still maintain it. Several other people | |
|
11 | + contributed patches for features and bug fixes (see Git history for a list). | |
|
12 | + Thanks! | |
|
13 | + | |
|
14 | + Originally, Isolate was a part of the [Moe Contest Environment](http://www.ucw.cz/moe/), | |
|
15 | + but it evolved to a separate project used by different | |
|
16 | + contest systems, most prominently [CMS](https://github.com/cms-dev/cms). | |
|
17 | + It now lives at [GitHub](https://github.com/ioi/isolate), | |
|
18 | + where you can submit bug reports and feature requests. | |
|
19 | + | |
|
20 | + If you are interested in more details, please read Martin's | |
|
21 | + and Bernard's [paper](http://mj.ucw.cz/papers/isolate.pdf) presented | |
|
22 | + at the IOI Conference. Also, Isolate's [manual page](http://www.ucw.cz/moe/isolate.1.html) | |
|
23 | + is available online. | |
|
24 | + | |
|
25 | + To compile Isolate, you need the headers for the libcap library | |
|
26 | + (usually available in a libcap-dev package). | |
|
27 | + | |
|
28 | + You may need `a2x` (found in [AsciiDoc](http://www.methods.co.nz/asciidoc/a2x.1.html)) for building manual. | |
|
29 | + But if you only want the isolate binary, you can just run `make isolate` |
@@ -0,0 +1,1 | |||
|
1 | + Examine the use of taskstats for measuring memory |
@@ -0,0 +1,327 | |||
|
1 | + /* | |
|
2 | + * Process Isolator -- Control Groups | |
|
3 | + * | |
|
4 | + * (c) 2012-2016 Martin Mares <mj@ucw.cz> | |
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | |
|
6 | + */ | |
|
7 | + | |
|
8 | + #include "isolate.h" | |
|
9 | + | |
|
10 | + #include <assert.h> | |
|
11 | + #include <errno.h> | |
|
12 | + #include <fcntl.h> | |
|
13 | + #include <stdio.h> | |
|
14 | + #include <stdlib.h> | |
|
15 | + #include <string.h> | |
|
16 | + #include <sys/stat.h> | |
|
17 | + #include <unistd.h> | |
|
18 | + | |
|
19 | + struct cg_controller_desc { | |
|
20 | + const char *name; | |
|
21 | + int optional; | |
|
22 | + }; | |
|
23 | + | |
|
24 | + typedef enum { | |
|
25 | + CG_MEMORY = 0, | |
|
26 | + CG_CPUACCT, | |
|
27 | + CG_CPUSET, | |
|
28 | + CG_NUM_CONTROLLERS, | |
|
29 | + CG_PARENT = 256, | |
|
30 | + } cg_controller; | |
|
31 | + | |
|
32 | + static const struct cg_controller_desc cg_controllers[CG_NUM_CONTROLLERS+1] = { | |
|
33 | + [CG_MEMORY] = { "memory", 0 }, | |
|
34 | + [CG_CPUACCT] = { "cpuacct", 0 }, | |
|
35 | + [CG_CPUSET] = { "cpuset", 1 }, | |
|
36 | + [CG_NUM_CONTROLLERS] = { NULL, 0 }, | |
|
37 | + }; | |
|
38 | + | |
|
39 | + #define FOREACH_CG_CONTROLLER(_controller) \ | |
|
40 | + for (cg_controller (_controller) = 0; \ | |
|
41 | + (_controller) < CG_NUM_CONTROLLERS; (_controller)++) | |
|
42 | + | |
|
43 | + static const char * | |
|
44 | + cg_controller_name(cg_controller c) | |
|
45 | + { | |
|
46 | + assert(c < CG_NUM_CONTROLLERS); | |
|
47 | + return cg_controllers[c].name; | |
|
48 | + } | |
|
49 | + | |
|
50 | + static int | |
|
51 | + cg_controller_optional(cg_controller c) | |
|
52 | + { | |
|
53 | + assert(c < CG_NUM_CONTROLLERS); | |
|
54 | + return cg_controllers[c].optional; | |
|
55 | + } | |
|
56 | + | |
|
57 | + static char cg_name[256]; | |
|
58 | + static char cg_parent_name[256]; | |
|
59 | + | |
|
60 | + #define CG_BUFSIZE 1024 | |
|
61 | + | |
|
62 | + static void | |
|
63 | + cg_makepath(char *buf, size_t len, cg_controller c, const char *attr) | |
|
64 | + { | |
|
65 | + snprintf(buf, len, "%s/%s/%s/%s", | |
|
66 | + cf_cg_root, | |
|
67 | + cg_controller_name(c & ~CG_PARENT), | |
|
68 | + (c & CG_PARENT) ? cg_parent_name : cg_name, | |
|
69 | + attr); | |
|
70 | + } | |
|
71 | + | |
|
72 | + static int | |
|
73 | + cg_read(cg_controller controller, const char *attr, char *buf) | |
|
74 | + { | |
|
75 | + int result = 0; | |
|
76 | + int maybe = 0; | |
|
77 | + if (attr[0] == '?') | |
|
78 | + { | |
|
79 | + attr++; | |
|
80 | + maybe = 1; | |
|
81 | + } | |
|
82 | + | |
|
83 | + char path[256]; | |
|
84 | + cg_makepath(path, sizeof(path), controller, attr); | |
|
85 | + | |
|
86 | + int fd = open(path, O_RDONLY); | |
|
87 | + if (fd < 0) | |
|
88 | + { | |
|
89 | + if (maybe) | |
|
90 | + goto fail; | |
|
91 | + die("Cannot read %s: %m", path); | |
|
92 | + } | |
|
93 | + | |
|
94 | + int n = read(fd, buf, CG_BUFSIZE); | |
|
95 | + if (n < 0) | |
|
96 | + { | |
|
97 | + if (maybe) | |
|
98 | + goto fail_close; | |
|
99 | + die("Cannot read %s: %m", path); | |
|
100 | + } | |
|
101 | + if (n >= CG_BUFSIZE - 1) | |
|
102 | + die("Attribute %s too long", path); | |
|
103 | + if (n > 0 && buf[n-1] == '\n') | |
|
104 | + n--; | |
|
105 | + buf[n] = 0; | |
|
106 | + | |
|
107 | + if (verbose > 1) | |
|
108 | + msg("CG: Read %s = <%s>\n", attr, buf); | |
|
109 | + | |
|
110 | + result = 1; | |
|
111 | + fail_close: | |
|
112 | + close(fd); | |
|
113 | + fail: | |
|
114 | + return result; | |
|
115 | + } | |
|
116 | + | |
|
117 | + static void __attribute__((format(printf,3,4))) | |
|
118 | + cg_write(cg_controller controller, const char *attr, const char *fmt, ...) | |
|
119 | + { | |
|
120 | + int maybe = 0; | |
|
121 | + if (attr[0] == '?') | |
|
122 | + { | |
|
123 | + attr++; | |
|
124 | + maybe = 1; | |
|
125 | + } | |
|
126 | + | |
|
127 | + va_list args; | |
|
128 | + va_start(args, fmt); | |
|
129 | + | |
|
130 | + char buf[CG_BUFSIZE]; | |
|
131 | + int n = vsnprintf(buf, sizeof(buf), fmt, args); | |
|
132 | + if (n >= CG_BUFSIZE) | |
|
133 | + die("cg_write: Value for attribute %s is too long", attr); | |
|
134 | + | |
|
135 | + if (verbose > 1) | |
|
136 | + msg("CG: Write %s = %s", attr, buf); | |
|
137 | + | |
|
138 | + char path[256]; | |
|
139 | + cg_makepath(path, sizeof(path), controller, attr); | |
|
140 | + | |
|
141 | + int fd = open(path, O_WRONLY | O_TRUNC); | |
|
142 | + if (fd < 0) | |
|
143 | + { | |
|
144 | + if (maybe) | |
|
145 | + goto fail; | |
|
146 | + else | |
|
147 | + die("Cannot write %s: %m", path); | |
|
148 | + } | |
|
149 | + | |
|
150 | + int written = write(fd, buf, n); | |
|
151 | + if (written < 0) | |
|
152 | + { | |
|
153 | + if (maybe) | |
|
154 | + goto fail_close; | |
|
155 | + else | |
|
156 | + die("Cannot set %s to %s: %m", path, buf); | |
|
157 | + } | |
|
158 | + if (written != n) | |
|
159 | + die("Short write to %s (%d out of %d bytes)", path, written, n); | |
|
160 | + | |
|
161 | + fail_close: | |
|
162 | + close(fd); | |
|
163 | + fail: | |
|
164 | + va_end(args); | |
|
165 | + } | |
|
166 | + | |
|
167 | + void | |
|
168 | + cg_init(void) | |
|
169 | + { | |
|
170 | + if (!cg_enable) | |
|
171 | + return; | |
|
172 | + | |
|
173 | + if (!dir_exists(cf_cg_root)) | |
|
174 | + die("Control group filesystem at %s not mounted", cf_cg_root); | |
|
175 | + | |
|
176 | + if (cf_cg_parent) | |
|
177 | + { | |
|
178 | + snprintf(cg_name, sizeof(cg_name), "%s/box-%d", cf_cg_parent, box_id); | |
|
179 | + snprintf(cg_parent_name, sizeof(cg_parent_name), "%s", cf_cg_parent); | |
|
180 | + } | |
|
181 | + else | |
|
182 | + { | |
|
183 | + snprintf(cg_name, sizeof(cg_name), "box-%d", box_id); | |
|
184 | + strcpy(cg_parent_name, "."); | |
|
185 | + } | |
|
186 | + msg("Using control group %s under parent %s\n", cg_name, cg_parent_name); | |
|
187 | + } | |
|
188 | + | |
|
189 | + void | |
|
190 | + cg_prepare(void) | |
|
191 | + { | |
|
192 | + if (!cg_enable) | |
|
193 | + return; | |
|
194 | + | |
|
195 | + struct stat st; | |
|
196 | + char buf[CG_BUFSIZE]; | |
|
197 | + char path[256]; | |
|
198 | + | |
|
199 | + FOREACH_CG_CONTROLLER(controller) | |
|
200 | + { | |
|
201 | + cg_makepath(path, sizeof(path), controller, ""); | |
|
202 | + if (stat(path, &st) >= 0 || errno != ENOENT) | |
|
203 | + { | |
|
204 | + msg("Control group %s already exists, trying to empty it.\n", path); | |
|
205 | + if (rmdir(path) < 0) | |
|
206 | + die("Failed to reset control group %s: %m", path); | |
|
207 | + } | |
|
208 | + | |
|
209 | + if (mkdir(path, 0777) < 0 && !cg_controller_optional(controller)) | |
|
210 | + die("Failed to create control group %s: %m", path); | |
|
211 | + } | |
|
212 | + | |
|
213 | + // If the cpuset module is enabled, set up allowed cpus and memory nodes. | |
|
214 | + // If per-box configuration exists, use it; otherwise, inherit the settings | |
|
215 | + // from the parent cgroup. | |
|
216 | + struct cf_per_box *cf = cf_current_box(); | |
|
217 | + if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.cpus", buf)) | |
|
218 | + cg_write(CG_CPUSET, "cpuset.cpus", "%s", cf->cpus ? cf->cpus : buf); | |
|
219 | + if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.mems", buf)) | |
|
220 | + cg_write(CG_CPUSET, "cpuset.mems", "%s", cf->mems ? cf->mems : buf); | |
|
221 | + } | |
|
222 | + | |
|
223 | + void | |
|
224 | + cg_enter(void) | |
|
225 | + { | |
|
226 | + if (!cg_enable) | |
|
227 | + return; | |
|
228 | + | |
|
229 | + msg("Entering control group %s\n", cg_name); | |
|
230 | + | |
|
231 | + FOREACH_CG_CONTROLLER(controller) | |
|
232 | + { | |
|
233 | + if (cg_controller_optional(controller)) | |
|
234 | + cg_write(controller, "?tasks", "%d\n", (int) getpid()); | |
|
235 | + else | |
|
236 | + cg_write(controller, "tasks", "%d\n", (int) getpid()); | |
|
237 | + } | |
|
238 | + | |
|
239 | + if (cg_memory_limit) | |
|
240 | + { | |
|
241 | + cg_write(CG_MEMORY, "memory.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10); | |
|
242 | + cg_write(CG_MEMORY, "?memory.memsw.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10); | |
|
243 | + cg_write(CG_MEMORY, "memory.max_usage_in_bytes", "0\n"); | |
|
244 | + cg_write(CG_MEMORY, "?memory.memsw.max_usage_in_bytes", "0\n"); | |
|
245 | + } | |
|
246 | + | |
|
247 | + if (cg_timing) | |
|
248 | + cg_write(CG_CPUACCT, "cpuacct.usage", "0\n"); | |
|
249 | + } | |
|
250 | + | |
|
251 | + int | |
|
252 | + cg_get_run_time_ms(void) | |
|
253 | + { | |
|
254 | + if (!cg_enable) | |
|
255 | + return 0; | |
|
256 | + | |
|
257 | + char buf[CG_BUFSIZE]; | |
|
258 | + cg_read(CG_CPUACCT, "cpuacct.usage", buf); | |
|
259 | + unsigned long long ns = atoll(buf); | |
|
260 | + return ns / 1000000; | |
|
261 | + } | |
|
262 | + | |
|
263 | + void | |
|
264 | + cg_stats(void) | |
|
265 | + { | |
|
266 | + if (!cg_enable) | |
|
267 | + return; | |
|
268 | + | |
|
269 | + char buf[CG_BUFSIZE]; | |
|
270 | + | |
|
271 | + // Memory usage statistics | |
|
272 | + unsigned long long mem=0, memsw=0; | |
|
273 | + if (cg_read(CG_MEMORY, "?memory.max_usage_in_bytes", buf)) | |
|
274 | + mem = atoll(buf); | |
|
275 | + if (cg_read(CG_MEMORY, "?memory.memsw.max_usage_in_bytes", buf)) | |
|
276 | + { | |
|
277 | + memsw = atoll(buf); | |
|
278 | + if (memsw > mem) | |
|
279 | + mem = memsw; | |
|
280 | + } | |
|
281 | + if (mem) | |
|
282 | + meta_printf("cg-mem:%lld\n", mem >> 10); | |
|
283 | + | |
|
284 | + // OOM kill detection | |
|
285 | + if (cg_read(CG_MEMORY, "?memory.oom_control", buf)) | |
|
286 | + { | |
|
287 | + int oom_killed = 0; | |
|
288 | + char *s = buf; | |
|
289 | + while (s) | |
|
290 | + { | |
|
291 | + if (sscanf(s, "oom_kill %d", &oom_killed) == 1 && oom_killed) | |
|
292 | + { | |
|
293 | + meta_printf("cg-oom-killed:1\n"); | |
|
294 | + break; | |
|
295 | + } | |
|
296 | + s = strchr(s, '\n'); | |
|
297 | + if (s) | |
|
298 | + s++; | |
|
299 | + } | |
|
300 | + } | |
|
301 | + } | |
|
302 | + | |
|
303 | + void | |
|
304 | + cg_remove(void) | |
|
305 | + { | |
|
306 | + char buf[CG_BUFSIZE]; | |
|
307 | + | |
|
308 | + if (!cg_enable) | |
|
309 | + return; | |
|
310 | + | |
|
311 | + FOREACH_CG_CONTROLLER(controller) | |
|
312 | + { | |
|
313 | + // The cgroup can be non-existent at this moment (e.g., --cleanup before the first --init) | |
|
314 | + if (!cg_read(controller, "?tasks", buf)) | |
|
315 | + continue; | |
|
316 | + | |
|
317 | + if (buf[0]) | |
|
318 | + die("Some tasks left in controller %s of cgroup %s, failed to remove it", | |
|
319 | + cg_controller_name(controller), cg_name); | |
|
320 | + | |
|
321 | + char path[256]; | |
|
322 | + cg_makepath(path, sizeof(path), controller, ""); | |
|
323 | + | |
|
324 | + if (rmdir(path) < 0) | |
|
325 | + die("Cannot remove control group %s: %m", path); | |
|
326 | + } | |
|
327 | + } |
@@ -0,0 +1,168 | |||
|
1 | + /* | |
|
2 | + * Process Isolator -- Configuration File | |
|
3 | + * | |
|
4 | + * (c) 2016 Martin Mares <mj@ucw.cz> | |
|
5 | + */ | |
|
6 | + | |
|
7 | + #include "isolate.h" | |
|
8 | + | |
|
9 | + #include <errno.h> | |
|
10 | + #include <stdio.h> | |
|
11 | + #include <stdlib.h> | |
|
12 | + #include <string.h> | |
|
13 | + | |
|
14 | + #define MAX_LINE_LEN 1024 | |
|
15 | + | |
|
16 | + char *cf_box_root; | |
|
17 | + char *cf_cg_root; | |
|
18 | + char *cf_cg_parent; | |
|
19 | + int cf_first_uid; | |
|
20 | + int cf_first_gid; | |
|
21 | + int cf_num_boxes; | |
|
22 | + | |
|
23 | + static int line_number; | |
|
24 | + static struct cf_per_box *per_box_configs; | |
|
25 | + | |
|
26 | + static void NONRET | |
|
27 | + cf_err(char *msg) | |
|
28 | + { | |
|
29 | + die("Error in config file, line %d: %s", line_number, msg); | |
|
30 | + } | |
|
31 | + | |
|
32 | + static char * | |
|
33 | + cf_string(char *val) | |
|
34 | + { | |
|
35 | + return xstrdup(val); | |
|
36 | + } | |
|
37 | + | |
|
38 | + static int | |
|
39 | + cf_int(char *val) | |
|
40 | + { | |
|
41 | + char *end; | |
|
42 | + errno = 0; | |
|
43 | + long int x = strtol(val, &end, 10); | |
|
44 | + if (errno || end == val || end && *end) | |
|
45 | + cf_err("Invalid number"); | |
|
46 | + if ((long int)(int) x != x) | |
|
47 | + cf_err("Number out of range"); | |
|
48 | + return x; | |
|
49 | + } | |
|
50 | + | |
|
51 | + static void | |
|
52 | + cf_entry_toplevel(char *key, char *val) | |
|
53 | + { | |
|
54 | + if (!strcmp(key, "box_root")) | |
|
55 | + cf_box_root = cf_string(val); | |
|
56 | + else if (!strcmp(key, "cg_root")) | |
|
57 | + cf_cg_root = cf_string(val); | |
|
58 | + else if (!strcmp(key, "cg_parent")) | |
|
59 | + cf_cg_parent = cf_string(val); | |
|
60 | + else if (!strcmp(key, "first_uid")) | |
|
61 | + cf_first_uid = cf_int(val); | |
|
62 | + else if (!strcmp(key, "first_gid")) | |
|
63 | + cf_first_gid = cf_int(val); | |
|
64 | + else if (!strcmp(key, "num_boxes")) | |
|
65 | + cf_num_boxes = cf_int(val); | |
|
66 | + else | |
|
67 | + cf_err("Unknown configuration item"); | |
|
68 | + } | |
|
69 | + | |
|
70 | + static void | |
|
71 | + cf_entry_compound(char *key, char *subkey, char *val) | |
|
72 | + { | |
|
73 | + if (strncmp(key, "box", 3)) | |
|
74 | + cf_err("Unknown configuration section"); | |
|
75 | + int box_id = cf_int(key + 3); | |
|
76 | + struct cf_per_box *c = cf_per_box(box_id); | |
|
77 | + | |
|
78 | + if (!strcmp(subkey, "cpus")) | |
|
79 | + c->cpus = cf_string(val); | |
|
80 | + else if (!strcmp(subkey, "mems")) | |
|
81 | + c->mems = cf_string(val); | |
|
82 | + else | |
|
83 | + cf_err("Unknown per-box configuration item"); | |
|
84 | + } | |
|
85 | + | |
|
86 | + static void | |
|
87 | + cf_entry(char *key, char *val) | |
|
88 | + { | |
|
89 | + char *dot = strchr(key, '.'); | |
|
90 | + if (!dot) | |
|
91 | + cf_entry_toplevel(key, val); | |
|
92 | + else | |
|
93 | + { | |
|
94 | + *dot++ = 0; | |
|
95 | + cf_entry_compound(key, dot, val); | |
|
96 | + } | |
|
97 | + } | |
|
98 | + | |
|
99 | + static void | |
|
100 | + cf_check(void) | |
|
101 | + { | |
|
102 | + if (!cf_box_root || | |
|
103 | + !cf_cg_root || | |
|
104 | + !cf_first_uid || | |
|
105 | + !cf_first_gid || | |
|
106 | + !cf_num_boxes) | |
|
107 | + cf_err("Configuration is not complete"); | |
|
108 | + } | |
|
109 | + | |
|
110 | + void | |
|
111 | + cf_parse(void) | |
|
112 | + { | |
|
113 | + FILE *f = fopen(CONFIG_FILE, "r"); | |
|
114 | + if (!f) | |
|
115 | + die("Cannot open %s: %m", CONFIG_FILE); | |
|
116 | + | |
|
117 | + char line[MAX_LINE_LEN]; | |
|
118 | + while (fgets(line, sizeof(line), f)) | |
|
119 | + { | |
|
120 | + line_number++; | |
|
121 | + char *nl = strchr(line, '\n'); | |
|
122 | + if (!nl) | |
|
123 | + cf_err("Line not terminated or too long"); | |
|
124 | + *nl = 0; | |
|
125 | + | |
|
126 | + if (!line[0] || line[0] == '#') | |
|
127 | + continue; | |
|
128 | + | |
|
129 | + char *s = line; | |
|
130 | + while (*s && *s != ' ' && *s != '\t' && *s != '=') | |
|
131 | + s++; | |
|
132 | + while (*s == ' ' || *s == '\t') | |
|
133 | + *s++ = 0; | |
|
134 | + if (*s != '=') | |
|
135 | + cf_err("Syntax error, expecting key=value"); | |
|
136 | + *s++ = 0; | |
|
137 | + while (*s == ' ' || *s == '\t') | |
|
138 | + *s++ = 0; | |
|
139 | + | |
|
140 | + cf_entry(line, s); | |
|
141 | + } | |
|
142 | + | |
|
143 | + fclose(f); | |
|
144 | + cf_check(); | |
|
145 | + } | |
|
146 | + | |
|
147 | + struct cf_per_box * | |
|
148 | + cf_per_box(int box_id) | |
|
149 | + { | |
|
150 | + struct cf_per_box *c; | |
|
151 | + | |
|
152 | + for (c = per_box_configs; c; c = c->next) | |
|
153 | + if (c->box_id == box_id) | |
|
154 | + return c; | |
|
155 | + | |
|
156 | + c = xmalloc(sizeof(*c)); | |
|
157 | + memset(c, 0, sizeof(*c)); | |
|
158 | + c->next = per_box_configs; | |
|
159 | + per_box_configs = c; | |
|
160 | + c->box_id = box_id; | |
|
161 | + return c; | |
|
162 | + } | |
|
163 | + | |
|
164 | + struct cf_per_box * | |
|
165 | + cf_current_box(void) | |
|
166 | + { | |
|
167 | + return cf_per_box(box_id); | |
|
168 | + } |
@@ -0,0 +1,24 | |||
|
1 | + # This is a configuration file for Isolate | |
|
2 | + | |
|
3 | + # All sandboxes are created under this directory. | |
|
4 | + # To avoid symlink attacks, this directory and all its ancestors | |
|
5 | + # must be writeable only to root. | |
|
6 | + box_root = /var/local/lib/isolate | |
|
7 | + | |
|
8 | + # Root of the control group hierarchy | |
|
9 | + cg_root = /sys/fs/cgroup | |
|
10 | + | |
|
11 | + # If the following variable is defined, the per-box cgroups | |
|
12 | + # are created as sub-groups of the named cgroup | |
|
13 | + #cg_parent = boxes | |
|
14 | + | |
|
15 | + # Block of UIDs and GIDs reserved for sandboxes | |
|
16 | + first_uid = 60000 | |
|
17 | + first_gid = 60000 | |
|
18 | + num_boxes = 1000 | |
|
19 | + | |
|
20 | + # Per-box settings of the set of allowed CPUs and NUMA nodes | |
|
21 | + # (see linux/Documentation/cgroups/cpusets.txt for precise syntax) | |
|
22 | + | |
|
23 | + #box0.cpus = 4-7 | |
|
24 | + #box0.mems = 1 |
@@ -0,0 +1,224 | |||
|
1 | + #!/bin/sh | |
|
2 | + # | |
|
3 | + # Identifies potential sources issues when using isolate. | |
|
4 | + # | |
|
5 | + # (c) 2017 Bernard Blackham <bernard@blackham.com.au> | |
|
6 | + # | |
|
7 | + | |
|
8 | + usage() { | |
|
9 | + cat <<EOT >&2 | |
|
10 | + Usage: $0 [-q|--quiet] [-e|--execute] | |
|
11 | + | |
|
12 | + Use this script to identify sources of run-time variability and other issues on | |
|
13 | + Linux machines which may affect isolate. If --execute is not specified, the | |
|
14 | + recommended actions are written to stdout as an executable shell script, | |
|
15 | + otherwise, using --execute will attempt to make changes to make the system | |
|
16 | + behave more deterministically. The changes performed by --execute persist only | |
|
17 | + until a reboot. To persist across reboots, the standard output from this script | |
|
18 | + should be added to /etc/rc.local or some other script that is run on each boot. | |
|
19 | + Alternately, you could add the following line to /etc/rc.local to automatically | |
|
20 | + apply these changes on boot, but use this with caution as not all issues can | |
|
21 | + be resolved in this way. | |
|
22 | + | |
|
23 | + isolate-check-environment --quiet --execute | |
|
24 | + | |
|
25 | + The exit status of this script will be 0 if all checks pass, or 1 if some | |
|
26 | + checks have failed. | |
|
27 | + | |
|
28 | + Note that there are more strategies to reduce run-time variability further. | |
|
29 | + See the man page of isolate for details under REPRODUCIBILITY. | |
|
30 | + EOT | |
|
31 | + exit 2 | |
|
32 | + } | |
|
33 | + | |
|
34 | + # Parse options. | |
|
35 | + args=$(getopt -o "ehq" --long "execute,help,quiet" -- "$@") || usage | |
|
36 | + eval set -- "$args" | |
|
37 | + quiet= | |
|
38 | + execute= | |
|
39 | + while : ; do | |
|
40 | + case "$1" in | |
|
41 | + -q|--quiet) quiet=1 ; shift ;; | |
|
42 | + -e|--execute) execute=1 ; shift ;; | |
|
43 | + -h|--help) usage ;; | |
|
44 | + --) shift ; break ;; | |
|
45 | + *) usage ;; | |
|
46 | + esac | |
|
47 | + done | |
|
48 | + [ -n "$*" ] && usage | |
|
49 | + | |
|
50 | + # Some helper boilerplate machinery. | |
|
51 | + exit_status=0 | |
|
52 | + red=$(tput setaf 1) | |
|
53 | + green=$(tput setaf 2) | |
|
54 | + yellow=$(tput setaf 3) | |
|
55 | + normal=$(tput sgr0) | |
|
56 | + | |
|
57 | + # Return true (0) if we are being quiet. | |
|
58 | + quiet() { | |
|
59 | + [ -n "$quiet" ] | |
|
60 | + } | |
|
61 | + | |
|
62 | + # Print all arguments to stderr as warning. | |
|
63 | + warn() { | |
|
64 | + quiet || echo WARNING: "$*" >&2 | |
|
65 | + } | |
|
66 | + | |
|
67 | + # Print first argument to stderr as warning, and second argument to stdout as | |
|
68 | + # the recommended remedial action, or execute if --execute is given. | |
|
69 | + action() { | |
|
70 | + quiet || warn "$1" | |
|
71 | + if [ -n "$execute" ] ; then | |
|
72 | + quiet || echo "+ $2" | |
|
73 | + sh -c "$2" | |
|
74 | + else | |
|
75 | + quiet || echo $2 | |
|
76 | + fi | |
|
77 | + } | |
|
78 | + | |
|
79 | + print_start_check() { | |
|
80 | + quiet && return | |
|
81 | + print_check_status=1 | |
|
82 | + echo -n "Checking for $@ ... " >&2 | |
|
83 | + } | |
|
84 | + | |
|
85 | + print_fail() { | |
|
86 | + exit_status=1 | |
|
87 | + quiet && return | |
|
88 | + [ -n "$print_check_status" ] && echo "${red}FAIL${normal}" >&2 | |
|
89 | + print_check_status= | |
|
90 | + } | |
|
91 | + | |
|
92 | + print_dubious() { | |
|
93 | + exit_status=1 | |
|
94 | + quiet && return | |
|
95 | + [ -n "$print_check_status" ] && echo "${yellow}CAUTION${normal}" >&2 | |
|
96 | + print_check_status= | |
|
97 | + } | |
|
98 | + | |
|
99 | + print_skipped() { | |
|
100 | + quiet && return | |
|
101 | + [ -n "$print_check_status" ] && echo "SKIPPED (not detected)" >&2 | |
|
102 | + print_check_status= | |
|
103 | + } | |
|
104 | + | |
|
105 | + print_finish() { | |
|
106 | + quiet && return | |
|
107 | + [ -n "$print_check_status" ] && echo "${green}PASS${normal}" >&2 | |
|
108 | + print_check_status= | |
|
109 | + } | |
|
110 | + | |
|
111 | + # Check that cgroups are enabled. | |
|
112 | + cgroup_check() { | |
|
113 | + local cgroup=$1 | |
|
114 | + print_start_check "cgroup support for $cgroup" | |
|
115 | + if ! test -f "/sys/fs/cgroup/$cgroup/tasks" ; then | |
|
116 | + print_dubious | |
|
117 | + warn "the $cgroup is not present. isolate --cg cannot be used." | |
|
118 | + fi | |
|
119 | + print_finish | |
|
120 | + } | |
|
121 | + cgroup_check memory | |
|
122 | + cgroup_check cpuacct | |
|
123 | + cgroup_check cpuset | |
|
124 | + | |
|
125 | + # Check that swap is either disabled or accounted for. | |
|
126 | + swap_check() { | |
|
127 | + print_start_check "swap" | |
|
128 | + # If swap is disabled, there is nothing to worry about. | |
|
129 | + local swaps | |
|
130 | + swaps=$(swapon --noheadings) | |
|
131 | + if [ -n "$swaps" ] ; then | |
|
132 | + # Swap is enabled. We had better have the memsw support in the memory | |
|
133 | + # cgroup. | |
|
134 | + if ! test -f "/sys/fs/cgroup/memory/memory.memsw.usage_in_bytes" ; then | |
|
135 | + print_fail | |
|
136 | + action \ | |
|
137 | + "swap is enabled, but swap accounting is not. isolate will not be able to enforce memory limits." \ | |
|
138 | + "swapoff -a" | |
|
139 | + else | |
|
140 | + print_dubious | |
|
141 | + warn "swap is enabled, and although accounted for, may still give run-time variability under memory pressure." | |
|
142 | + fi | |
|
143 | + fi | |
|
144 | + print_finish | |
|
145 | + } | |
|
146 | + swap_check | |
|
147 | + | |
|
148 | + # Check that CPU frequency scaling is disabled. | |
|
149 | + cpufreq_check() { | |
|
150 | + print_start_check "CPU frequency scaling" | |
|
151 | + local anycpus policy | |
|
152 | + anycpus= | |
|
153 | + # Ensure cpufreq governor is set to performance on all CPUs | |
|
154 | + for cpufreq_file in $(find /sys/devices/system/cpu/cpufreq/ -name scaling_governor) ; do | |
|
155 | + policy=$(cat $cpufreq_file) | |
|
156 | + if [ "$policy" != "performance" ] ; then | |
|
157 | + print_fail | |
|
158 | + action \ | |
|
159 | + "cpufreq governor set to '$policy', but 'performance' would be better" \ | |
|
160 | + "echo performance > $cpufreq_file" | |
|
161 | + fi | |
|
162 | + anycpus=1 | |
|
163 | + done | |
|
164 | + [ -z "$anycpus" ] && print_skipped | |
|
165 | + print_finish | |
|
166 | + } | |
|
167 | + cpufreq_check | |
|
168 | + | |
|
169 | + # Check that address space layout randomisation is disabled. | |
|
170 | + aslr_check() { | |
|
171 | + print_start_check "kernel address space randomisation" | |
|
172 | + local val | |
|
173 | + if val=$(cat /proc/sys/kernel/randomize_va_space 2>/dev/null) ; then | |
|
174 | + if [ "$val" -ne 0 ] ; then | |
|
175 | + print_fail | |
|
176 | + action \ | |
|
177 | + "address space randomisation is enabled." \ | |
|
178 | + "echo 0 > /proc/sys/kernel/randomize_va_space" | |
|
179 | + fi | |
|
180 | + else | |
|
181 | + print_skipped | |
|
182 | + fi | |
|
183 | + print_finish | |
|
184 | + } | |
|
185 | + aslr_check | |
|
186 | + | |
|
187 | + # Check that transparent huge-pages are disabled, as this leads to | |
|
188 | + # non-determinism depending on whether the kernel can allocate 2 MiB pages or | |
|
189 | + # not. | |
|
190 | + thp_check() { | |
|
191 | + print_start_check "transparent hugepage support" | |
|
192 | + local val | |
|
193 | + if val=$(cat /sys/kernel/mm/transparent_hugepage/enabled 2>/dev/null) ; then | |
|
194 | + case $val in | |
|
195 | + *'[never]'*) ;; | |
|
196 | + *) print_fail | |
|
197 | + action \ | |
|
198 | + "transparent hugepages are enabled." \ | |
|
199 | + "echo never > /sys/kernel/mm/transparent_hugepage/enabled" ;; | |
|
200 | + esac | |
|
201 | + fi | |
|
202 | + if val=$(cat /sys/kernel/mm/transparent_hugepage/defrag 2>/dev/null) ; then | |
|
203 | + case $val in | |
|
204 | + *'[never]'*) ;; | |
|
205 | + *) print_fail | |
|
206 | + action \ | |
|
207 | + "transparent hugepage defrag is enabled." \ | |
|
208 | + "echo never > /sys/kernel/mm/transparent_hugepage/defrag" ;; | |
|
209 | + esac | |
|
210 | + fi | |
|
211 | + if val=$(cat /sys/kernel/mm/transparent_hugepage/khugepaged/defrag 2>/dev/null) ; then | |
|
212 | + if [ "$val" -ne 0 ] ; then | |
|
213 | + print_fail | |
|
214 | + action \ | |
|
215 | + "khugepaged defrag is enabled." \ | |
|
216 | + "echo 0 > /sys/kernel/mm/transparent_hugepage/khugepaged/defrag" | |
|
217 | + fi | |
|
218 | + fi | |
|
219 | + print_finish | |
|
220 | + } | |
|
221 | + thp_check | |
|
222 | + | |
|
223 | + | |
|
224 | + exit $exit_status |
@@ -0,0 +1,348 | |||
|
1 | + ISOLATE(1) | |
|
2 | + ========== | |
|
3 | + | |
|
4 | + NAME | |
|
5 | + ---- | |
|
6 | + isolate - Isolate a process using Linux Containers | |
|
7 | + | |
|
8 | + SYNOPSIS | |
|
9 | + -------- | |
|
10 | + *isolate* 'options' *--init* | |
|
11 | + | |
|
12 | + *isolate* 'options' *--run* +--+ 'program' 'arguments' | |
|
13 | + | |
|
14 | + *isolate* 'options' *--cleanup* | |
|
15 | + | |
|
16 | + DESCRIPTION | |
|
17 | + ----------- | |
|
18 | + Run 'program' within a sandbox, so that it cannot communicate with the | |
|
19 | + outside world and its resource consumption is limited. This can be used | |
|
20 | + for example in a programming contest to run untrusted programs submitted | |
|
21 | + by contestants in a controlled environment. | |
|
22 | + | |
|
23 | + The sandbox is used in the following way: | |
|
24 | + | |
|
25 | + * Run *isolate --init*, which initializes the sandbox, creates its working directory and | |
|
26 | + prints its name to the standard output. Fails if the sandbox already existed. | |
|
27 | + | |
|
28 | + * Populate the directory with the executable file of the program and its | |
|
29 | + input files. | |
|
30 | + | |
|
31 | + * Call *isolate --run* to run the program. A single line describing the | |
|
32 | + status of the program is written to the standard error stream. | |
|
33 | + | |
|
34 | + * Fetch the output of the program from the directory. | |
|
35 | + | |
|
36 | + * Run *isolate --cleanup* to remove temporary files. Does nothing if the sandbox | |
|
37 | + was already cleaned up. | |
|
38 | + | |
|
39 | + Please note that by default, the program is not allowed to start multiple | |
|
40 | + processes of threads. If you need that, turn on the control group mode | |
|
41 | + (see below). | |
|
42 | + | |
|
43 | + OPTIONS | |
|
44 | + ------- | |
|
45 | + *-M, --meta=*'file':: | |
|
46 | + Output meta-data on the execution of the program to a given file. | |
|
47 | + See below for syntax of the meta-files. | |
|
48 | + | |
|
49 | + *-m, --mem=*'size':: | |
|
50 | + Limit address space of the program to 'size' kilobytes. If more processes | |
|
51 | + are allowed, this applies to each of them separately. | |
|
52 | + | |
|
53 | + *-t, --time=*'time':: | |
|
54 | + Limit run time of the program to 'time' seconds. Fractional numbers are allowed. | |
|
55 | + Time in which the OS assigns the processor to different tasks is not counted. | |
|
56 | + | |
|
57 | + *-w, --wall-time=*'time':: | |
|
58 | + Limit wall-clock time to 'time' seconds. Fractional values are allowed. | |
|
59 | + This clock measures the time from the start of the program to its exit, | |
|
60 | + so it does not stop when the program has lost the CPU or when it is waiting | |
|
61 | + for an external event. We recommend to use *--time* as the main limit, | |
|
62 | + but set *--wall-time* to a much higher value as a precaution against | |
|
63 | + sleeping programs. | |
|
64 | + | |
|
65 | + *-x, --extra-time=*'time':: | |
|
66 | + When a time limit is exceeded, wait for extra 'time' seconds before | |
|
67 | + killing the program. This has the advantage that the real execution time | |
|
68 | + is reported, even though it slightly exceeds the limit. Fractional | |
|
69 | + numbers are again allowed. | |
|
70 | + | |
|
71 | + *-b, --box-id=*'id':: | |
|
72 | + When you run multiple sandboxes in parallel, you have to assign each unique | |
|
73 | + IDs to them by this option. See the discussion on UIDs in the INSTALLATION | |
|
74 | + section. The ID defaults to 0. | |
|
75 | + | |
|
76 | + *-k, --stack=*'size':: | |
|
77 | + Limit process stack to 'size' kilobytes. By default, the whole address | |
|
78 | + space is available for the stack, but it is subject to the *--mem* limit. | |
|
79 | + | |
|
80 | + *-f, --fsize=*'size':: | |
|
81 | + Limit size of files created (or modified) by the program to 'size' kilobytes. | |
|
82 | + In most cases, it is better to restrict overall disk usage by a disk quota | |
|
83 | + (see below). This option can help in cases when quotas are not enabled | |
|
84 | + on the underlying filesystem. | |
|
85 | + | |
|
86 | + *-q, --quota=*'blocks'*,*'inodes':: | |
|
87 | + Set disk quota to a given number of blocks and inodes. This requires the | |
|
88 | + filesystem to be mounted with support for quotas. Please note that this | |
|
89 | + currently works only on the ext family of filesystems (other filesystems | |
|
90 | + use other interfaces for setting quotas). | |
|
91 | + | |
|
92 | + *-i, --stdin=*'file':: | |
|
93 | + Redirect standard input from 'file'. The 'file' has to be accessible | |
|
94 | + inside the sandbox. Otherwise, standard input is inherited from the | |
|
95 | + parent process. | |
|
96 | + | |
|
97 | + *-o, --stdout=*'file':: | |
|
98 | + Redirect standard output to 'file'. The 'file' has to be accessible | |
|
99 | + inside the sandbox. Otherwise, standard output is inherited from the | |
|
100 | + parent process and the sandbox manager does not write anything to it. | |
|
101 | + | |
|
102 | + *-r, --stderr=*'file':: | |
|
103 | + Redirect standard error output to 'file'. The 'file' has to be accessible | |
|
104 | + inside the sandbox. Otherwise, standard error output is inherited from the | |
|
105 | + parent process. See also *--stderr-to-stdout*. | |
|
106 | + | |
|
107 | + *--stderr-to-stdout*:: | |
|
108 | + Redirect standard error output to standard output. This is performed after | |
|
109 | + the standard output is redirected by *--stdout*. Mutually exclusive with *--stderr*. | |
|
110 | + | |
|
111 | + *-c, --chdir=*'dir':: | |
|
112 | + Change directory to 'dir' before executing the program. This path must be | |
|
113 | + relative to the root of the sandbox. | |
|
114 | + | |
|
115 | + *-p, --processes*[*=*'max']:: | |
|
116 | + Permit the program to create up to 'max' processes and/or threads. Please | |
|
117 | + keep in mind that time and memory limit do not work with multiple processes | |
|
118 | + unless you enable the control group mode. If 'max' is not given, an arbitrary | |
|
119 | + number of processes can be run. By default, only one process is permitted. | |
|
120 | + | |
|
121 | + *--share-net*:: | |
|
122 | + By default, isolate creates a new network namespace for its child process. | |
|
123 | + This namespace contains no network devices except for a per-namespace loopback. | |
|
124 | + This prevents the program from communicating with the outside world. If you want | |
|
125 | + to permit communication, you can use this switch to keep the child process | |
|
126 | + in parent's network namespace. | |
|
127 | + | |
|
128 | + *--inherit-fds*:: | |
|
129 | + By default, isolate closes all file descriptors passed from its parent | |
|
130 | + except for descriptors 0, 1, and 2. | |
|
131 | + This prevents unintentional descriptor leaks. In some cases, passing extra | |
|
132 | + descriptors to the sandbox can be desirable, so you can use this switch | |
|
133 | + to make them survive. | |
|
134 | + | |
|
135 | + *-v, --verbose*:: | |
|
136 | + Tell the sandbox manager to be verbose and report on what is going on. | |
|
137 | + Using *-v* multiple times produces even more jabber. | |
|
138 | + | |
|
139 | + *-s, --silent*:: | |
|
140 | + Tell the sandbox manager to keep silence. No status messages are printed | |
|
141 | + to stderr except for fatal errors of the sandbox itself. The combination of | |
|
142 | + *--verbose* and *--silent* has an undefined effect. | |
|
143 | + | |
|
144 | + ENVIRONMENT RULES | |
|
145 | + ----------------- | |
|
146 | + UNIX processes normally inherit all environment variables from their parent. The | |
|
147 | + sandbox however passes only those variables which are explicitly requested by | |
|
148 | + environment rules: | |
|
149 | + | |
|
150 | + *-E, --env=*'var':: | |
|
151 | + Inherit the variable 'var' from the parent. | |
|
152 | + | |
|
153 | + *-E, --env=*'var'*=*'value':: | |
|
154 | + Set the variable 'var' to 'value'. When the 'value' is empty, the | |
|
155 | + variable is removed from the environment. | |
|
156 | + | |
|
157 | + *-e, --full-env*:: | |
|
158 | + Inherit all variables from the parent. | |
|
159 | + | |
|
160 | + The rules are applied in the order in which they were given, except for | |
|
161 | + *--full-env*, which is applied first. | |
|
162 | + | |
|
163 | + The list of rules is automatically initialized with *-ELIBC_FATAL_STDERR_=1*. | |
|
164 | + | |
|
165 | + DIRECTORY RULES | |
|
166 | + --------------- | |
|
167 | + The sandboxed process gets its own filesystem namespace, which contains only subtrees | |
|
168 | + requested by directory rules: | |
|
169 | + | |
|
170 | + *-d, --dir=*'in'*=*'out'[*:*'options']:: | |
|
171 | + Bind the directory 'out' as seen by the caller to the path 'in' inside the sandbox. | |
|
172 | + If there already was a directory rule for 'in', it is replaced. | |
|
173 | + | |
|
174 | + *-d, --dir=*'dir'[*:*'options']:: | |
|
175 | + Bind the directory +/+'dir' to 'dir' inside the sandbox. | |
|
176 | + If there already was a directory rule for 'in', it is replaced. | |
|
177 | + | |
|
178 | + *-d, --dir=*'in'*=*:: | |
|
179 | + Remove a directory rule for the path 'in' inside the sandbox. | |
|
180 | + | |
|
181 | + By default, all directories are bound read-only and restricted (no devices, | |
|
182 | + no setuid binaries). This behavior can be modified using the 'options': | |
|
183 | + | |
|
184 | + *rw*:: | |
|
185 | + Allow read-write access. | |
|
186 | + | |
|
187 | + *dev*:: | |
|
188 | + Allow access to character and block devices. | |
|
189 | + | |
|
190 | + *noexec*:: | |
|
191 | + Disallow execution of binaries. | |
|
192 | + | |
|
193 | + *maybe*:: | |
|
194 | + Silently ignore the rule if the directory to be bound does not exist. | |
|
195 | + | |
|
196 | + *fs*:: | |
|
197 | + Instead of binding a directory, mount a device-less filesystem called 'in'. | |
|
198 | + For example, this can be 'proc' or 'sysfs'. | |
|
199 | + | |
|
200 | + Unless *--no-default-dirs* is specified, the default set of directory rules binds +/bin+, | |
|
201 | + +/dev+ (with devices allowed), +/lib+, +/lib64+ (if it exists), and +/usr+. It also binds | |
|
202 | + the working directory to +/box+ (read-write) and mounts the proc filesystem at +/proc+. | |
|
203 | + | |
|
204 | + *-D, --no-default-dirs*:: | |
|
205 | + Do not bind the default set of directories. Care has to be taken to specify | |
|
206 | + the correct set of rules (using *--dir*) for the executed program to run | |
|
207 | + correctly. In particular, +/box+ has to be bound. | |
|
208 | + | |
|
209 | + CONTROL GROUPS | |
|
210 | + -------------- | |
|
211 | + Isolate can make use of system control groups provided by the kernel | |
|
212 | + to constrain programs consisting of multiple processes. Please note | |
|
213 | + that this feature needs special system setup described in the INSTALLATION | |
|
214 | + section. | |
|
215 | + | |
|
216 | + *--cg*:: | |
|
217 | + Enable use of control groups. This should be specified with *--init*, | |
|
218 | + *--run* and *--cleanup*. | |
|
219 | + | |
|
220 | + *--cg-mem=*'size':: | |
|
221 | + Limit total memory usage by the whole control group to 'size' kilobytes. | |
|
222 | + This should be specified with *--run*. | |
|
223 | + | |
|
224 | + *--cg-timing*:: | |
|
225 | + Use control groups for timing, so that the *--time* switch affects the | |
|
226 | + total run time of all processes and threads in the control group. | |
|
227 | + This should be specified with *--run*. | |
|
228 | + This option is turned on by default, use *--no-cg-timing* to turn off. | |
|
229 | + | |
|
230 | + META-FILES | |
|
231 | + ---------- | |
|
232 | + The meta-file contains miscellaneous meta-information on execution of the | |
|
233 | + program within the sandbox. It is a textual file consisting of lines | |
|
234 | + of format 'key'*:*'value'. The following keys are defined: | |
|
235 | + | |
|
236 | + *cg-mem*:: | |
|
237 | + When control groups are enabled, this is the total memory use | |
|
238 | + by the whole control group (in kilobytes). | |
|
239 | + *cg-oom-killed*:: | |
|
240 | + Present when the program was killed by the out-of-memory killer | |
|
241 | + (e.g., because it has exceeded the memory limit of its control group). | |
|
242 | + This is reported only on Linux 4.13 and later. | |
|
243 | + *csw-forced*:: | |
|
244 | + Number of context switches forced by the kernel. | |
|
245 | + *csw-voluntary*:: | |
|
246 | + Number of context switches caused by the process giving up the CPU | |
|
247 | + voluntarily. | |
|
248 | + *exitcode*:: | |
|
249 | + The program has exited normally with this exit code. | |
|
250 | + *exitsig*:: | |
|
251 | + The program has exited after receiving this fatal signal. | |
|
252 | + *killed*:: | |
|
253 | + Present when the program was terminated by the sandbox | |
|
254 | + (e.g., because it has exceeded the time limit). | |
|
255 | + *max-rss*:: | |
|
256 | + Maximum resident set size of the process (in kilobytes). | |
|
257 | + *message*:: | |
|
258 | + Status message, not intended for machine processing. | |
|
259 | + E.g., "Time limit exceeded." | |
|
260 | + *status*:: | |
|
261 | + Two-letter status code: | |
|
262 | + * *RE* -- run-time error, i.e., exited with a non-zero exit code | |
|
263 | + * *SG* -- program died on a signal | |
|
264 | + * *TO* -- timed out | |
|
265 | + * *XX* -- internal error of the sandbox | |
|
266 | + *time*:: | |
|
267 | + Run time of the program in fractional seconds. | |
|
268 | + *time-wall*:: | |
|
269 | + Wall clock time of the program in fractional seconds. | |
|
270 | + | |
|
271 | + Please note that not all keys have to be present. | |
|
272 | + For example, no *status* nor *message* is reported upon normal termination. | |
|
273 | + | |
|
274 | + RETURN VALUE | |
|
275 | + ------------ | |
|
276 | + When the program inside the sandbox finishes correctly, the sandbox returns 0. | |
|
277 | + If it finishes incorrectly, it returns 1. | |
|
278 | + All other return codes signal an internal error. | |
|
279 | + | |
|
280 | + INSTALLATION | |
|
281 | + ------------ | |
|
282 | + Isolate depends on several advanced features of the Linux kernel. Please | |
|
283 | + make sure that your kernel supports | |
|
284 | + PID namespaces (+CONFIG_PID_NS+), | |
|
285 | + IPC namespaces (+CONFIG_IPC_NS+), and | |
|
286 | + network namespaces (+CONFIG_NET_NS+). | |
|
287 | + If you want to use control groups, you need | |
|
288 | + the cpusets (+CONFIG_CPUSETS+), | |
|
289 | + CPU accounting controller (+CONFIG_CGROUP_CPUACCT+), and | |
|
290 | + memory resource controller (+CONFIG_MEMCG+). If your machine has swap enabled, | |
|
291 | + you should also enable the swap controller (+CONFIG_MEMCG_SWAP+). | |
|
292 | + | |
|
293 | + Debian 7.x and newer require enabling the memory and swap cgroup controllers by | |
|
294 | + adding the parameters "cgroup_enable=memory swapaccount=1" to the kernel | |
|
295 | + command-line, which can be set using +GRUB_CMDLINE_LINUX_DEFAULT+ in | |
|
296 | + /etc/default/grub. | |
|
297 | + | |
|
298 | + Isolate is designed to run setuid to root. The sub-process inside the sandbox | |
|
299 | + then switches to a non-privileged user ID (different for each *--box-id*). | |
|
300 | + The range of UIDs available and several filesystem paths are set in a configuration | |
|
301 | + file, by default located in /usr/local/etc/isolate. | |
|
302 | + | |
|
303 | + Before you run isolate with control groups, you need to ensure that the cgroup | |
|
304 | + filesystem is enabled and mounted. Most modern Linux distributions already | |
|
305 | + provide cgroup support through a tmpfs mounted at /sys/fs/cgroup, with | |
|
306 | + individual controllers mounted within subdirectories. | |
|
307 | + | |
|
308 | + REPRODUCIBILITY | |
|
309 | + --------------- | |
|
310 | + | |
|
311 | + The reproducibility of results can be improved by tuning some kernel | |
|
312 | + parameters, listed below. Some of these parameters can be checked using the | |
|
313 | + program isolate-check-environment. | |
|
314 | + | |
|
315 | + * Disable address space randomization: +sysctl kernel.randomize_va_space=0+. | |
|
316 | + Address space randomization can affect timing, memory usage, and program | |
|
317 | + behavior. This setting can be made persistent through /etc/sysctl.d/. | |
|
318 | + | |
|
319 | + * Disable dynamic CPU frequency scaling. This requires setting the cpufreq | |
|
320 | + scaling governor to +performance+. The process for doing this varies between | |
|
321 | + distributions. | |
|
322 | + | |
|
323 | + * Consider disabling Turboboost on CPUs that might support it (most i3/i5/i7 | |
|
324 | + Intel CPUs). Approach this one with caution. Disabling a CPU that Turboboosts | |
|
325 | + from 2.3 GHz to 2.6 GHz would have minimal impact on run-times in exchange | |
|
326 | + for determinism, but the same on a CPU that Turboboosts from 1.6 GHz to 2.8 | |
|
327 | + GHz will incur a much more dramatic slowdown. Perhaps if the ambient | |
|
328 | + temperature is controlled and only one single-threaded task is keeping the | |
|
329 | + CPU busy at 100%, then TB's behaviour may be reasonably deterministic; | |
|
330 | + requires further experimentation to confirm. | |
|
331 | + | |
|
332 | + * Run evaluations on a single CPU (core). The Linux scheduler has a tendency to randomly | |
|
333 | + migrate tasks between CPUs, incurring cache migration costs. You can use isolate's | |
|
334 | + configuration file to pin the process to a specified CPU. | |
|
335 | + | |
|
336 | + * Disable automatic kernel support for transparent huge pages. Both /sys/kernel/mm/transparent_hugepage/enabled | |
|
337 | + and /sys/kernel/mm/transparent_hugepage/defrag should be set to "madvise" or "never", and | |
|
338 | + /sys/kernel/mm/transparent_hugepage/khugepaged/defrag to 0. | |
|
339 | + | |
|
340 | + * Disable swapping. If you really need swap space and you are using cgroups, | |
|
341 | + make sure that you have the memsw controller enabled, so that swap space is | |
|
342 | + properly accounted for. | |
|
343 | + | |
|
344 | + LICENSE | |
|
345 | + ------- | |
|
346 | + Isolate was written by Martin Mares and Bernard Blackham. | |
|
347 | + It can be distributed and used under the terms of the GNU | |
|
348 | + General Public License version 2 or any later version. |
This diff has been collapsed as it changes many lines, (1122 lines changed) Show them Hide them | |||
@@ -0,0 +1,1122 | |||
|
1 | + /* | |
|
2 | + * A Process Isolator based on Linux Containers | |
|
3 | + * | |
|
4 | + * (c) 2012-2018 Martin Mares <mj@ucw.cz> | |
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | |
|
6 | + */ | |
|
7 | + | |
|
8 | + #include "isolate.h" | |
|
9 | + | |
|
10 | + #include <errno.h> | |
|
11 | + #include <fcntl.h> | |
|
12 | + #include <getopt.h> | |
|
13 | + #include <grp.h> | |
|
14 | + #include <sched.h> | |
|
15 | + #include <stdio.h> | |
|
16 | + #include <stdlib.h> | |
|
17 | + #include <string.h> | |
|
18 | + #include <sys/mount.h> | |
|
19 | + #include <sys/resource.h> | |
|
20 | + #include <sys/signal.h> | |
|
21 | + #include <sys/stat.h> | |
|
22 | + #include <sys/time.h> | |
|
23 | + #include <sys/vfs.h> | |
|
24 | + #include <sys/wait.h> | |
|
25 | + #include <time.h> | |
|
26 | + #include <unistd.h> | |
|
27 | + | |
|
28 | + /* May not be defined in older glibc headers */ | |
|
29 | + #ifndef MS_PRIVATE | |
|
30 | + #warning "Working around old glibc: no MS_PRIVATE" | |
|
31 | + #define MS_PRIVATE (1 << 18) | |
|
32 | + #endif | |
|
33 | + #ifndef MS_REC | |
|
34 | + #warning "Working around old glibc: no MS_REC" | |
|
35 | + #define MS_REC (1 << 14) | |
|
36 | + #endif | |
|
37 | + | |
|
38 | + /* | |
|
39 | + * Theory of operation | |
|
40 | + * | |
|
41 | + * Generally, we want to run a process inside a namespace/cgroup and watch it | |
|
42 | + * from the outside. However, the reality is a little bit more complicated as we | |
|
43 | + * do not want the inside process to become the init process of the PID namespace | |
|
44 | + * (we want to have all signals properly delivered). | |
|
45 | + * | |
|
46 | + * We are running three processes: | |
|
47 | + * | |
|
48 | + * - Keeper process (root privileges, parent namespace, parent cgroups) | |
|
49 | + * - Proxy process (UID/GID of the calling user, init process of the child | |
|
50 | + * namespace, parent cgroups) | |
|
51 | + * - Inside process (per-box UID/GID, child namespace, child cgroups) | |
|
52 | + * | |
|
53 | + * The proxy process just waits for the inside process to exit and then it passes | |
|
54 | + * the exit status to the keeper. | |
|
55 | + * | |
|
56 | + * We use two pipes: | |
|
57 | + * | |
|
58 | + * - Error pipe for error messages produced by the proxy process and the early | |
|
59 | + * stages of the inside process (until exec()). Listened to by the keeper. | |
|
60 | + * - Status pipe for passing the PID of the inside process and its exit status | |
|
61 | + * from the proxy to the keeper. | |
|
62 | + */ | |
|
63 | + | |
|
64 | + #define TIMER_INTERVAL_US 100000 | |
|
65 | + | |
|
66 | + static int timeout; /* milliseconds */ | |
|
67 | + static int wall_timeout; | |
|
68 | + static int extra_timeout; | |
|
69 | + int pass_environ; | |
|
70 | + int verbose; | |
|
71 | + static int silent; | |
|
72 | + static int fsize_limit; | |
|
73 | + static int memory_limit; | |
|
74 | + static int stack_limit; | |
|
75 | + int block_quota; | |
|
76 | + int inode_quota; | |
|
77 | + static int max_processes = 1; | |
|
78 | + static char *redir_stdin, *redir_stdout, *redir_stderr; | |
|
79 | + static int redir_stderr_to_stdout; | |
|
80 | + static char *set_cwd; | |
|
81 | + static int share_net; | |
|
82 | + static int inherit_fds; | |
|
83 | + static int default_dirs = 1; | |
|
84 | + | |
|
85 | + int cg_enable; | |
|
86 | + int cg_memory_limit; | |
|
87 | + int cg_timing = 1; | |
|
88 | + | |
|
89 | + int box_id; | |
|
90 | + static char box_dir[1024]; | |
|
91 | + static pid_t box_pid; | |
|
92 | + static pid_t proxy_pid; | |
|
93 | + | |
|
94 | + uid_t box_uid; | |
|
95 | + gid_t box_gid; | |
|
96 | + uid_t orig_uid; | |
|
97 | + gid_t orig_gid; | |
|
98 | + | |
|
99 | + static int partial_line; | |
|
100 | + static int cleanup_ownership; | |
|
101 | + | |
|
102 | + static struct timeval start_time; | |
|
103 | + static int ticks_per_sec; | |
|
104 | + static int total_ms, wall_ms; | |
|
105 | + static volatile sig_atomic_t timer_tick, interrupt; | |
|
106 | + | |
|
107 | + static int error_pipes[2]; | |
|
108 | + static int write_errors_to_fd; | |
|
109 | + static int read_errors_from_fd; | |
|
110 | + | |
|
111 | + static int status_pipes[2]; | |
|
112 | + | |
|
113 | + static int get_wall_time_ms(void); | |
|
114 | + static int get_run_time_ms(struct rusage *rus); | |
|
115 | + | |
|
116 | + /*** Messages and exits ***/ | |
|
117 | + | |
|
118 | + static void | |
|
119 | + final_stats(struct rusage *rus) | |
|
120 | + { | |
|
121 | + total_ms = get_run_time_ms(rus); | |
|
122 | + wall_ms = get_wall_time_ms(); | |
|
123 | + | |
|
124 | + meta_printf("time:%d.%03d\n", total_ms/1000, total_ms%1000); | |
|
125 | + meta_printf("time-wall:%d.%03d\n", wall_ms/1000, wall_ms%1000); | |
|
126 | + meta_printf("max-rss:%ld\n", rus->ru_maxrss); | |
|
127 | + meta_printf("csw-voluntary:%ld\n", rus->ru_nvcsw); | |
|
128 | + meta_printf("csw-forced:%ld\n", rus->ru_nivcsw); | |
|
129 | + | |
|
130 | + cg_stats(); | |
|
131 | + } | |
|
132 | + | |
|
133 | + static void NONRET | |
|
134 | + box_exit(int rc) | |
|
135 | + { | |
|
136 | + if (proxy_pid > 0) | |
|
137 | + { | |
|
138 | + if (box_pid > 0) | |
|
139 | + { | |
|
140 | + kill(-box_pid, SIGKILL); | |
|
141 | + kill(box_pid, SIGKILL); | |
|
142 | + } | |
|
143 | + kill(-proxy_pid, SIGKILL); | |
|
144 | + kill(proxy_pid, SIGKILL); | |
|
145 | + meta_printf("killed:1\n"); | |
|
146 | + | |
|
147 | + struct rusage rus; | |
|
148 | + int p, stat; | |
|
149 | + do | |
|
150 | + p = wait4(proxy_pid, &stat, 0, &rus); | |
|
151 | + while (p < 0 && errno == EINTR); | |
|
152 | + if (p < 0) | |
|
153 | + fprintf(stderr, "UGH: Lost track of the process (%m)\n"); | |
|
154 | + else | |
|
155 | + final_stats(&rus); | |
|
156 | + } | |
|
157 | + | |
|
158 | + if (rc < 2 && cleanup_ownership) | |
|
159 | + chowntree("box", orig_uid, orig_gid); | |
|
160 | + | |
|
161 | + meta_close(); | |
|
162 | + exit(rc); | |
|
163 | + } | |
|
164 | + | |
|
165 | + static void | |
|
166 | + flush_line(void) | |
|
167 | + { | |
|
168 | + if (partial_line) | |
|
169 | + fputc('\n', stderr); | |
|
170 | + partial_line = 0; | |
|
171 | + } | |
|
172 | + | |
|
173 | + /* Report an error of the sandbox itself */ | |
|
174 | + void NONRET __attribute__((format(printf,1,2))) | |
|
175 | + die(char *msg, ...) | |
|
176 | + { | |
|
177 | + va_list args; | |
|
178 | + va_start(args, msg); | |
|
179 | + char buf[1024]; | |
|
180 | + int n = vsnprintf(buf, sizeof(buf), msg, args); | |
|
181 | + | |
|
182 | + // If the child processes are still running, show no mercy. | |
|
183 | + if (box_pid > 0) | |
|
184 | + { | |
|
185 | + kill(-box_pid, SIGKILL); | |
|
186 | + kill(box_pid, SIGKILL); | |
|
187 | + } | |
|
188 | + if (proxy_pid > 0) | |
|
189 | + { | |
|
190 | + kill(-proxy_pid, SIGKILL); | |
|
191 | + kill(proxy_pid, SIGKILL); | |
|
192 | + } | |
|
193 | + | |
|
194 | + if (write_errors_to_fd) | |
|
195 | + { | |
|
196 | + // We are inside the box, have to use error pipe for error reporting. | |
|
197 | + // We hope that the whole error message fits in PIPE_BUF bytes. | |
|
198 | + write(write_errors_to_fd, buf, n); | |
|
199 | + exit(2); | |
|
200 | + } | |
|
201 | + | |
|
202 | + // Otherwise, we in the box keeper process, so we report errors normally | |
|
203 | + flush_line(); | |
|
204 | + meta_printf("status:XX\nmessage:%s\n", buf); | |
|
205 | + fputs(buf, stderr); | |
|
206 | + fputc('\n', stderr); | |
|
207 | + box_exit(2); | |
|
208 | + } | |
|
209 | + | |
|
210 | + /* Report an error of the program inside the sandbox */ | |
|
211 | + void NONRET __attribute__((format(printf,1,2))) | |
|
212 | + err(char *msg, ...) | |
|
213 | + { | |
|
214 | + va_list args; | |
|
215 | + va_start(args, msg); | |
|
216 | + flush_line(); | |
|
217 | + if (msg[0] && msg[1] && msg[2] == ':' && msg[3] == ' ') | |
|
218 | + { | |
|
219 | + meta_printf("status:%c%c\n", msg[0], msg[1]); | |
|
220 | + msg += 4; | |
|
221 | + } | |
|
222 | + char buf[1024]; | |
|
223 | + vsnprintf(buf, sizeof(buf), msg, args); | |
|
224 | + meta_printf("message:%s\n", buf); | |
|
225 | + if (!silent) | |
|
226 | + { | |
|
227 | + fputs(buf, stderr); | |
|
228 | + fputc('\n', stderr); | |
|
229 | + } | |
|
230 | + box_exit(1); | |
|
231 | + } | |
|
232 | + | |
|
233 | + /* Write a message, but only if in verbose mode */ | |
|
234 | + void __attribute__((format(printf,1,2))) | |
|
235 | + msg(char *msg, ...) | |
|
236 | + { | |
|
237 | + va_list args; | |
|
238 | + va_start(args, msg); | |
|
239 | + if (verbose) | |
|
240 | + { | |
|
241 | + int len = strlen(msg); | |
|
242 | + if (len > 0) | |
|
243 | + partial_line = (msg[len-1] != '\n'); | |
|
244 | + vfprintf(stderr, msg, args); | |
|
245 | + fflush(stderr); | |
|
246 | + } | |
|
247 | + va_end(args); | |
|
248 | + } | |
|
249 | + | |
|
250 | + /*** Signal handling in keeper process ***/ | |
|
251 | + | |
|
252 | + /* | |
|
253 | + * Signal handling is tricky. We must set up signal handlers before | |
|
254 | + * we start the child process (and reset them in the child process). | |
|
255 | + * Otherwise, there is a short time window where a SIGINT can kill | |
|
256 | + * us and leave the child process running. | |
|
257 | + */ | |
|
258 | + | |
|
259 | + struct signal_rule { | |
|
260 | + int signum; | |
|
261 | + enum { SIGNAL_IGNORE, SIGNAL_INTERRUPT, SIGNAL_FATAL } action; | |
|
262 | + }; | |
|
263 | + | |
|
264 | + static const struct signal_rule signal_rules[] = { | |
|
265 | + { SIGHUP, SIGNAL_INTERRUPT }, | |
|
266 | + { SIGINT, SIGNAL_INTERRUPT }, | |
|
267 | + { SIGQUIT, SIGNAL_INTERRUPT }, | |
|
268 | + { SIGILL, SIGNAL_FATAL }, | |
|
269 | + { SIGABRT, SIGNAL_FATAL }, | |
|
270 | + { SIGFPE, SIGNAL_FATAL }, | |
|
271 | + { SIGSEGV, SIGNAL_FATAL }, | |
|
272 | + { SIGPIPE, SIGNAL_IGNORE }, | |
|
273 | + { SIGTERM, SIGNAL_INTERRUPT }, | |
|
274 | + { SIGUSR1, SIGNAL_IGNORE }, | |
|
275 | + { SIGUSR2, SIGNAL_IGNORE }, | |
|
276 | + { SIGBUS, SIGNAL_FATAL }, | |
|
277 | + }; | |
|
278 | + | |
|
279 | + static void | |
|
280 | + signal_alarm(int unused UNUSED) | |
|
281 | + { | |
|
282 | + /* Time limit checks are synchronous, so we only schedule them there. */ | |
|
283 | + timer_tick = 1; | |
|
284 | + msg("[timer]"); | |
|
285 | + } | |
|
286 | + | |
|
287 | + static void | |
|
288 | + signal_int(int signum) | |
|
289 | + { | |
|
290 | + /* Interrupts (e.g., SIGINT) are synchronous, too. */ | |
|
291 | + interrupt = signum; | |
|
292 | + } | |
|
293 | + | |
|
294 | + static void | |
|
295 | + signal_fatal(int signum) | |
|
296 | + { | |
|
297 | + /* If we receive SIGSEGV or a similar signal, we try to die gracefully. */ | |
|
298 | + die("Sandbox keeper received fatal signal %d", signum); | |
|
299 | + } | |
|
300 | + | |
|
301 | + static void | |
|
302 | + setup_signals(void) | |
|
303 | + { | |
|
304 | + struct sigaction sa_int, sa_fatal; | |
|
305 | + bzero(&sa_int, sizeof(sa_int)); | |
|
306 | + sa_int.sa_handler = signal_int; | |
|
307 | + bzero(&sa_fatal, sizeof(sa_fatal)); | |
|
308 | + sa_fatal.sa_handler = signal_fatal; | |
|
309 | + | |
|
310 | + for (int i=0; i < ARRAY_SIZE(signal_rules); i++) | |
|
311 | + { | |
|
312 | + const struct signal_rule *sr = &signal_rules[i]; | |
|
313 | + switch (sr->action) | |
|
314 | + { | |
|
315 | + case SIGNAL_IGNORE: | |
|
316 | + signal(sr->signum, SIG_IGN); | |
|
317 | + break; | |
|
318 | + case SIGNAL_INTERRUPT: | |
|
319 | + sigaction(sr->signum, &sa_int, NULL); | |
|
320 | + break; | |
|
321 | + case SIGNAL_FATAL: | |
|
322 | + sigaction(sr->signum, &sa_fatal, NULL); | |
|
323 | + break; | |
|
324 | + default: | |
|
325 | + die("Invalid signal rule"); | |
|
326 | + } | |
|
327 | + } | |
|
328 | + } | |
|
329 | + | |
|
330 | + static void | |
|
331 | + reset_signals(void) | |
|
332 | + { | |
|
333 | + for (int i=0; i < ARRAY_SIZE(signal_rules); i++) | |
|
334 | + signal(signal_rules[i].signum, SIG_DFL); | |
|
335 | + } | |
|
336 | + | |
|
337 | + /*** The keeper process ***/ | |
|
338 | + | |
|
339 | + #define PROC_BUF_SIZE 4096 | |
|
340 | + static int | |
|
341 | + read_proc_file(char *buf, char *name, int *fdp) | |
|
342 | + { | |
|
343 | + int c; | |
|
344 | + | |
|
345 | + if (*fdp < 0) | |
|
346 | + { | |
|
347 | + snprintf(buf, PROC_BUF_SIZE, "/proc/%d/%s", (int) box_pid, name); | |
|
348 | + *fdp = open(buf, O_RDONLY); | |
|
349 | + if (*fdp < 0) | |
|
350 | + return 0; // This is OK, the process could have finished | |
|
351 | + } | |
|
352 | + lseek(*fdp, 0, SEEK_SET); | |
|
353 | + if ((c = read(*fdp, buf, PROC_BUF_SIZE-1)) < 0) | |
|
354 | + { | |
|
355 | + // Even this could fail if the process disappeared since open() | |
|
356 | + return 0; | |
|
357 | + } | |
|
358 | + if (c >= PROC_BUF_SIZE-1) | |
|
359 | + die("/proc/$pid/%s too long", name); | |
|
360 | + buf[c] = 0; | |
|
361 | + return 1; | |
|
362 | + } | |
|
363 | + | |
|
364 | + static int | |
|
365 | + get_wall_time_ms(void) | |
|
366 | + { | |
|
367 | + struct timeval now, wall; | |
|
368 | + gettimeofday(&now, NULL); | |
|
369 | + timersub(&now, &start_time, &wall); | |
|
370 | + return wall.tv_sec*1000 + wall.tv_usec/1000; | |
|
371 | + } | |
|
372 | + | |
|
373 | + static int | |
|
374 | + get_run_time_ms(struct rusage *rus) | |
|
375 | + { | |
|
376 | + if (cg_enable && cg_timing) | |
|
377 | + return cg_get_run_time_ms(); | |
|
378 | + | |
|
379 | + if (rus) | |
|
380 | + { | |
|
381 | + struct timeval total; | |
|
382 | + timeradd(&rus->ru_utime, &rus->ru_stime, &total); | |
|
383 | + return total.tv_sec*1000 + total.tv_usec/1000; | |
|
384 | + } | |
|
385 | + | |
|
386 | + // It might happen that we do not know the box_pid (see comments in find_box_pid()) | |
|
387 | + if (!box_pid) | |
|
388 | + return 0; | |
|
389 | + | |
|
390 | + char buf[PROC_BUF_SIZE], *x; | |
|
391 | + int utime, stime; | |
|
392 | + static int proc_stat_fd = -1; | |
|
393 | + | |
|
394 | + if (!read_proc_file(buf, "stat", &proc_stat_fd)) | |
|
395 | + return 0; | |
|
396 | + x = buf; | |
|
397 | + while (*x && *x != ' ') | |
|
398 | + x++; | |
|
399 | + while (*x == ' ') | |
|
400 | + x++; | |
|
401 | + if (*x++ != '(') | |
|
402 | + die("proc stat syntax error 1"); | |
|
403 | + while (*x && (*x != ')' || x[1] != ' ')) | |
|
404 | + x++; | |
|
405 | + while (*x == ')' || *x == ' ') | |
|
406 | + x++; | |
|
407 | + if (sscanf(x, "%*c %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %d %d", &utime, &stime) != 2) | |
|
408 | + die("proc stat syntax error 2"); | |
|
409 | + | |
|
410 | + return (utime + stime) * 1000 / ticks_per_sec; | |
|
411 | + } | |
|
412 | + | |
|
413 | + static void | |
|
414 | + check_timeout(void) | |
|
415 | + { | |
|
416 | + if (wall_timeout) | |
|
417 | + { | |
|
418 | + int wall_ms = get_wall_time_ms(); | |
|
419 | + if (wall_ms > wall_timeout) | |
|
420 | + err("TO: Time limit exceeded (wall clock)"); | |
|
421 | + if (verbose > 1) | |
|
422 | + fprintf(stderr, "[wall time check: %d msec]\n", wall_ms); | |
|
423 | + } | |
|
424 | + if (timeout) | |
|
425 | + { | |
|
426 | + int ms = get_run_time_ms(NULL); | |
|
427 | + if (verbose > 1) | |
|
428 | + fprintf(stderr, "[time check: %d msec]\n", ms); | |
|
429 | + if (ms > timeout && ms > extra_timeout) | |
|
430 | + err("TO: Time limit exceeded"); | |
|
431 | + } | |
|
432 | + } | |
|
433 | + | |
|
434 | + static void | |
|
435 | + box_keeper(void) | |
|
436 | + { | |
|
437 | + read_errors_from_fd = error_pipes[0]; | |
|
438 | + close(error_pipes[1]); | |
|
439 | + close(status_pipes[1]); | |
|
440 | + | |
|
441 | + gettimeofday(&start_time, NULL); | |
|
442 | + ticks_per_sec = sysconf(_SC_CLK_TCK); | |
|
443 | + if (ticks_per_sec <= 0) | |
|
444 | + die("Invalid ticks_per_sec!"); | |
|
445 | + | |
|
446 | + if (timeout || wall_timeout) | |
|
447 | + { | |
|
448 | + struct sigaction sa; | |
|
449 | + bzero(&sa, sizeof(sa)); | |
|
450 | + sa.sa_handler = signal_alarm; | |
|
451 | + sigaction(SIGALRM, &sa, NULL); | |
|
452 | + struct itimerval timer = { | |
|
453 | + .it_interval = { .tv_usec = TIMER_INTERVAL_US }, | |
|
454 | + .it_value = { .tv_usec = TIMER_INTERVAL_US }, | |
|
455 | + }; | |
|
456 | + setitimer(ITIMER_REAL, &timer, NULL); | |
|
457 | + } | |
|
458 | + | |
|
459 | + for(;;) | |
|
460 | + { | |
|
461 | + struct rusage rus; | |
|
462 | + int stat; | |
|
463 | + pid_t p; | |
|
464 | + if (interrupt) | |
|
465 | + { | |
|
466 | + meta_printf("exitsig:%d\n", interrupt); | |
|
467 | + err("SG: Interrupted"); | |
|
468 | + } | |
|
469 | + if (timer_tick) | |
|
470 | + { | |
|
471 | + check_timeout(); | |
|
472 | + timer_tick = 0; | |
|
473 | + } | |
|
474 | + p = wait4(proxy_pid, &stat, 0, &rus); | |
|
475 | + if (p < 0) | |
|
476 | + { | |
|
477 | + if (errno == EINTR) | |
|
478 | + continue; | |
|
479 | + die("wait4: %m"); | |
|
480 | + } | |
|
481 | + if (p != proxy_pid) | |
|
482 | + die("wait4: unknown pid %d exited!", p); | |
|
483 | + proxy_pid = 0; | |
|
484 | + | |
|
485 | + // Check error pipe if there is an internal error passed from inside the box | |
|
486 | + char interr[1024]; | |
|
487 | + int n = read(read_errors_from_fd, interr, sizeof(interr) - 1); | |
|
488 | + if (n > 0) | |
|
489 | + { | |
|
490 | + interr[n] = 0; | |
|
491 | + die("%s", interr); | |
|
492 | + } | |
|
493 | + | |
|
494 | + // Check status pipe if there is an exit status reported by the proxy process | |
|
495 | + n = read(status_pipes[0], &stat, sizeof(stat)); | |
|
496 | + if (n != sizeof(stat)) | |
|
497 | + die("Did not receive exit status from proxy"); | |
|
498 | + | |
|
499 | + final_stats(&rus); | |
|
500 | + if (timeout && total_ms > timeout) | |
|
501 | + err("TO: Time limit exceeded"); | |
|
502 | + if (wall_timeout && wall_ms > wall_timeout) | |
|
503 | + err("TO: Time limit exceeded (wall clock)"); | |
|
504 | + | |
|
505 | + if (WIFEXITED(stat)) | |
|
506 | + { | |
|
507 | + meta_printf("exitcode:%d\n", WEXITSTATUS(stat)); | |
|
508 | + if (WEXITSTATUS(stat)) | |
|
509 | + err("RE: Exited with error status %d", WEXITSTATUS(stat)); | |
|
510 | + flush_line(); | |
|
511 | + if (!silent) | |
|
512 | + { | |
|
513 | + fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall)\n", | |
|
514 | + total_ms/1000, total_ms%1000, | |
|
515 | + wall_ms/1000, wall_ms%1000); | |
|
516 | + } | |
|
517 | + box_exit(0); | |
|
518 | + } | |
|
519 | + else if (WIFSIGNALED(stat)) | |
|
520 | + { | |
|
521 | + meta_printf("exitsig:%d\n", WTERMSIG(stat)); | |
|
522 | + err("SG: Caught fatal signal %d", WTERMSIG(stat)); | |
|
523 | + } | |
|
524 | + else if (WIFSTOPPED(stat)) | |
|
525 | + { | |
|
526 | + meta_printf("exitsig:%d\n", WSTOPSIG(stat)); | |
|
527 | + err("SG: Stopped by signal %d", WSTOPSIG(stat)); | |
|
528 | + } | |
|
529 | + else | |
|
530 | + die("wait4: unknown status %x, giving up!", stat); | |
|
531 | + } | |
|
532 | + } | |
|
533 | + | |
|
534 | + /*** The process running inside the box ***/ | |
|
535 | + | |
|
536 | + static void | |
|
537 | + setup_root(void) | |
|
538 | + { | |
|
539 | + if (mkdir("root", 0750) < 0 && errno != EEXIST) | |
|
540 | + die("mkdir('root'): %m"); | |
|
541 | + | |
|
542 | + /* | |
|
543 | + * Ensure all mounts are private, not shared. We don't want our mounts | |
|
544 | + * appearing outside of our namespace. | |
|
545 | + * (systemd since version 188 mounts filesystems shared by default). | |
|
546 | + */ | |
|
547 | + if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) | |
|
548 | + die("Cannot privatize mounts: %m"); | |
|
549 | + | |
|
550 | + if (mount("none", "root", "tmpfs", 0, "mode=755") < 0) | |
|
551 | + die("Cannot mount root ramdisk: %m"); | |
|
552 | + | |
|
553 | + apply_dir_rules(default_dirs); | |
|
554 | + | |
|
555 | + if (chroot("root") < 0) | |
|
556 | + die("Chroot failed: %m"); | |
|
557 | + | |
|
558 | + if (chdir("root/box") < 0) | |
|
559 | + die("Cannot change current directory: %m"); | |
|
560 | + } | |
|
561 | + | |
|
562 | + static void | |
|
563 | + setup_credentials(void) | |
|
564 | + { | |
|
565 | + if (setresgid(box_gid, box_gid, box_gid) < 0) | |
|
566 | + die("setresgid: %m"); | |
|
567 | + if (setgroups(0, NULL) < 0) | |
|
568 | + die("setgroups: %m"); | |
|
569 | + if (setresuid(box_uid, box_uid, box_uid) < 0) | |
|
570 | + die("setresuid: %m"); | |
|
571 | + setpgrp(); | |
|
572 | + } | |
|
573 | + | |
|
574 | + static void | |
|
575 | + setup_fds(void) | |
|
576 | + { | |
|
577 | + if (redir_stdin) | |
|
578 | + { | |
|
579 | + close(0); | |
|
580 | + if (open(redir_stdin, O_RDONLY) != 0) | |
|
581 | + die("open(\"%s\"): %m", redir_stdin); | |
|
582 | + } | |
|
583 | + if (redir_stdout) | |
|
584 | + { | |
|
585 | + close(1); | |
|
586 | + if (open(redir_stdout, O_WRONLY | O_CREAT | O_TRUNC, 0666) != 1) | |
|
587 | + die("open(\"%s\"): %m", redir_stdout); | |
|
588 | + } | |
|
589 | + if (redir_stderr) | |
|
590 | + { | |
|
591 | + close(2); | |
|
592 | + if (open(redir_stderr, O_WRONLY | O_CREAT | O_TRUNC, 0666) != 2) | |
|
593 | + die("open(\"%s\"): %m", redir_stderr); | |
|
594 | + } | |
|
595 | + if (redir_stderr_to_stdout) | |
|
596 | + { | |
|
597 | + if (dup2(1, 2) < 0) | |
|
598 | + die("Cannot dup stdout to stderr: %m"); | |
|
599 | + } | |
|
600 | + } | |
|
601 | + | |
|
602 | + static void | |
|
603 | + setup_rlim(const char *res_name, int res, rlim_t limit) | |
|
604 | + { | |
|
605 | + struct rlimit rl = { .rlim_cur = limit, .rlim_max = limit }; | |
|
606 | + if (setrlimit(res, &rl) < 0) | |
|
607 | + die("setrlimit(%s, %jd)", res_name, (intmax_t) limit); | |
|
608 | + } | |
|
609 | + | |
|
610 | + static void | |
|
611 | + setup_rlimits(void) | |
|
612 | + { | |
|
613 | + #define RLIM(res, val) setup_rlim("RLIMIT_" #res, RLIMIT_##res, val) | |
|
614 | + | |
|
615 | + if (memory_limit) | |
|
616 | + RLIM(AS, (rlim_t)memory_limit * 1024); | |
|
617 | + | |
|
618 | + if (fsize_limit) | |
|
619 | + RLIM(FSIZE, (rlim_t)fsize_limit * 1024); | |
|
620 | + | |
|
621 | + RLIM(STACK, (stack_limit ? (rlim_t)stack_limit * 1024 : RLIM_INFINITY)); | |
|
622 | + RLIM(NOFILE, 64); | |
|
623 | + RLIM(MEMLOCK, 0); | |
|
624 | + | |
|
625 | + if (max_processes) | |
|
626 | + RLIM(NPROC, max_processes); | |
|
627 | + | |
|
628 | + #undef RLIM | |
|
629 | + } | |
|
630 | + | |
|
631 | + static int | |
|
632 | + box_inside(char **args) | |
|
633 | + { | |
|
634 | + cg_enter(); | |
|
635 | + setup_root(); | |
|
636 | + setup_rlimits(); | |
|
637 | + setup_credentials(); | |
|
638 | + setup_fds(); | |
|
639 | + char **env = setup_environment(); | |
|
640 | + | |
|
641 | + if (set_cwd && chdir(set_cwd)) | |
|
642 | + die("chdir: %m"); | |
|
643 | + | |
|
644 | + execve(args[0], args, env); | |
|
645 | + die("execve(\"%s\"): %m", args[0]); | |
|
646 | + } | |
|
647 | + | |
|
648 | + /*** Proxy ***/ | |
|
649 | + | |
|
650 | + static void | |
|
651 | + setup_orig_credentials(void) | |
|
652 | + { | |
|
653 | + if (setresgid(orig_gid, orig_gid, orig_gid) < 0) | |
|
654 | + die("setresgid: %m"); | |
|
655 | + if (setgroups(0, NULL) < 0) | |
|
656 | + die("setgroups: %m"); | |
|
657 | + if (setresuid(orig_uid, orig_uid, orig_uid) < 0) | |
|
658 | + die("setresuid: %m"); | |
|
659 | + } | |
|
660 | + | |
|
661 | + static int | |
|
662 | + box_proxy(void *arg) | |
|
663 | + { | |
|
664 | + char **args = arg; | |
|
665 | + | |
|
666 | + write_errors_to_fd = error_pipes[1]; | |
|
667 | + close(error_pipes[0]); | |
|
668 | + close(status_pipes[0]); | |
|
669 | + meta_close(); | |
|
670 | + reset_signals(); | |
|
671 | + | |
|
672 | + pid_t inside_pid = fork(); | |
|
673 | + if (inside_pid < 0) | |
|
674 | + die("Cannot run process, fork failed: %m"); | |
|
675 | + else if (!inside_pid) | |
|
676 | + { | |
|
677 | + close(status_pipes[1]); | |
|
678 | + box_inside(args); | |
|
679 | + _exit(42); // We should never get here | |
|
680 | + } | |
|
681 | + | |
|
682 | + setup_orig_credentials(); | |
|
683 | + if (write(status_pipes[1], &inside_pid, sizeof(inside_pid)) != sizeof(inside_pid)) | |
|
684 | + die("Proxy write to pipe failed: %m"); | |
|
685 | + | |
|
686 | + int stat; | |
|
687 | + pid_t p = waitpid(inside_pid, &stat, 0); | |
|
688 | + if (p < 0) | |
|
689 | + die("Proxy waitpid() failed: %m"); | |
|
690 | + | |
|
691 | + if (write(status_pipes[1], &stat, sizeof(stat)) != sizeof(stat)) | |
|
692 | + die("Proxy write to pipe failed: %m"); | |
|
693 | + | |
|
694 | + _exit(0); | |
|
695 | + } | |
|
696 | + | |
|
697 | + static void | |
|
698 | + box_init(void) | |
|
699 | + { | |
|
700 | + if (box_id < 0 || box_id >= cf_num_boxes) | |
|
701 | + die("Sandbox ID out of range (allowed: 0-%d)", cf_num_boxes-1); | |
|
702 | + box_uid = cf_first_uid + box_id; | |
|
703 | + box_gid = cf_first_gid + box_id; | |
|
704 | + | |
|
705 | + snprintf(box_dir, sizeof(box_dir), "%s/%d", cf_box_root, box_id); | |
|
706 | + make_dir(box_dir); | |
|
707 | + if (chdir(box_dir) < 0) | |
|
708 | + die("chdir(%s): %m", box_dir); | |
|
709 | + } | |
|
710 | + | |
|
711 | + /*** Commands ***/ | |
|
712 | + | |
|
713 | + static const char * | |
|
714 | + self_name(void) | |
|
715 | + { | |
|
716 | + return cg_enable ? "isolate --cg" : "isolate"; | |
|
717 | + } | |
|
718 | + | |
|
719 | + static void | |
|
720 | + init(void) | |
|
721 | + { | |
|
722 | + msg("Preparing sandbox directory\n"); | |
|
723 | + if (mkdir("box", 0700) < 0) | |
|
724 | + { | |
|
725 | + if (errno == EEXIST) | |
|
726 | + die("Box already exists, run `%s --cleanup' first", self_name()); | |
|
727 | + else | |
|
728 | + die("Cannot create box: %m"); | |
|
729 | + } | |
|
730 | + if (chown("box", orig_uid, orig_gid) < 0) | |
|
731 | + die("Cannot chown box: %m"); | |
|
732 | + | |
|
733 | + cg_prepare(); | |
|
734 | + set_quota(); | |
|
735 | + | |
|
736 | + puts(box_dir); | |
|
737 | + } | |
|
738 | + | |
|
739 | + static void | |
|
740 | + cleanup(void) | |
|
741 | + { | |
|
742 | + if (!dir_exists("box")) | |
|
743 | + { | |
|
744 | + msg("Nothing to do -- box directory did not exist\n"); | |
|
745 | + return; | |
|
746 | + } | |
|
747 | + | |
|
748 | + msg("Deleting sandbox directory\n"); | |
|
749 | + rmtree(box_dir); | |
|
750 | + cg_remove(); | |
|
751 | + } | |
|
752 | + | |
|
753 | + static void | |
|
754 | + setup_pipe(int *fds, int nonblocking) | |
|
755 | + { | |
|
756 | + if (pipe(fds) < 0) | |
|
757 | + die("pipe: %m"); | |
|
758 | + for (int i=0; i<2; i++) | |
|
759 | + if (fcntl(fds[i], F_SETFD, fcntl(fds[i], F_GETFD) | FD_CLOEXEC) < 0 || | |
|
760 | + nonblocking && fcntl(fds[i], F_SETFL, fcntl(fds[i], F_GETFL) | O_NONBLOCK) < 0) | |
|
761 | + die("fcntl on pipe: %m"); | |
|
762 | + } | |
|
763 | + | |
|
764 | + static void | |
|
765 | + find_box_pid(void) | |
|
766 | + { | |
|
767 | + /* | |
|
768 | + * The box keeper process wants to poll status of the inside process, | |
|
769 | + * so it needs to know the box_pid. However, it is not easy to obtain: | |
|
770 | + * we got the PID from the proxy, but it is local to the PID namespace. | |
|
771 | + * Instead, we ask /proc to enumerate the children of the proxy. | |
|
772 | + * | |
|
773 | + * CAVEAT: The timing is tricky. We know that the inside process was | |
|
774 | + * already started (passing the PID from the proxy to us guarantees it), | |
|
775 | + * but it might already have exited and be reaped by the proxy. Therefore | |
|
776 | + * it is correct if we fail to find anything. | |
|
777 | + */ | |
|
778 | + | |
|
779 | + char namebuf[256]; | |
|
780 | + snprintf(namebuf, sizeof(namebuf), "/proc/%d/task/%d/children", (int) proxy_pid, (int) proxy_pid); | |
|
781 | + FILE *f = fopen(namebuf, "r"); | |
|
782 | + if (!f) | |
|
783 | + return; | |
|
784 | + | |
|
785 | + int child; | |
|
786 | + if (fscanf(f, "%d", &child) != 1) | |
|
787 | + { | |
|
788 | + fclose(f); | |
|
789 | + return; | |
|
790 | + } | |
|
791 | + box_pid = child; | |
|
792 | + | |
|
793 | + if (fscanf(f, "%d", &child) == 1) | |
|
794 | + die("Error parsing %s: unexpected children found", namebuf); | |
|
795 | + | |
|
796 | + fclose(f); | |
|
797 | + } | |
|
798 | + | |
|
799 | + static void | |
|
800 | + run(char **argv) | |
|
801 | + { | |
|
802 | + if (!dir_exists("box")) | |
|
803 | + die("Box directory not found, did you run `%s --init'?", self_name()); | |
|
804 | + | |
|
805 | + if (!inherit_fds) | |
|
806 | + close_all_fds(); | |
|
807 | + | |
|
808 | + chowntree("box", box_uid, box_gid); | |
|
809 | + cleanup_ownership = 1; | |
|
810 | + | |
|
811 | + setup_pipe(error_pipes, 1); | |
|
812 | + setup_pipe(status_pipes, 0); | |
|
813 | + setup_signals(); | |
|
814 | + | |
|
815 | + proxy_pid = clone( | |
|
816 | + box_proxy, // Function to execute as the body of the new process | |
|
817 | + argv, // Pass our stack | |
|
818 | + SIGCHLD | CLONE_NEWIPC | (share_net ? 0 : CLONE_NEWNET) | CLONE_NEWNS | CLONE_NEWPID, | |
|
819 | + argv); // Pass the arguments | |
|
820 | + if (proxy_pid < 0) | |
|
821 | + die("Cannot run proxy, clone failed: %m"); | |
|
822 | + if (!proxy_pid) | |
|
823 | + die("Cannot run proxy, clone returned 0"); | |
|
824 | + | |
|
825 | + pid_t box_pid_inside_ns; | |
|
826 | + int n = read(status_pipes[0], &box_pid_inside_ns, sizeof(box_pid_inside_ns)); | |
|
827 | + if (n != sizeof(box_pid_inside_ns)) | |
|
828 | + die("Proxy failed before it passed box_pid: %m"); | |
|
829 | + find_box_pid(); | |
|
830 | + msg("Started proxy_pid=%d box_pid=%d box_pid_inside_ns=%d\n", (int) proxy_pid, (int) box_pid, (int) box_pid_inside_ns); | |
|
831 | + | |
|
832 | + box_keeper(); | |
|
833 | + } | |
|
834 | + | |
|
835 | + static void | |
|
836 | + show_version(void) | |
|
837 | + { | |
|
838 | + printf("The process isolator " VERSION "\n"); | |
|
839 | + printf("(c) 2012--" YEAR " Martin Mares and Bernard Blackham\n"); | |
|
840 | + printf("Built on " BUILD_DATE " from Git commit " BUILD_COMMIT "\n"); | |
|
841 | + } | |
|
842 | + | |
|
843 | + /*** Options ***/ | |
|
844 | + | |
|
845 | + static void __attribute__((format(printf,1,2))) | |
|
846 | + usage(const char *msg, ...) | |
|
847 | + { | |
|
848 | + if (msg != NULL) | |
|
849 | + { | |
|
850 | + va_list args; | |
|
851 | + va_start(args, msg); | |
|
852 | + vfprintf(stderr, msg, args); | |
|
853 | + va_end(args); | |
|
854 | + } | |
|
855 | + printf("\ | |
|
856 | + Usage: isolate [<options>] <command>\n\ | |
|
857 | + \n\ | |
|
858 | + Options:\n\ | |
|
859 | + -b, --box-id=<id>\tWhen multiple sandboxes are used in parallel, each must get a unique ID\n\ | |
|
860 | + --cg\t\tEnable use of control groups\n\ | |
|
861 | + --cg-mem=<size>\tLimit memory usage of the control group to <size> KB\n\ | |
|
862 | + --cg-timing\t\tTime limits affects total run time of the control group\n\ | |
|
863 | + \t\t\t(this is turned on by default, use --no-cg-timing to turn off)\n\ | |
|
864 | + -c, --chdir=<dir>\tChange directory to <dir> before executing the program\n\ | |
|
865 | + -d, --dir=<dir>\t\tMake a directory <dir> visible inside the sandbox\n\ | |
|
866 | + --dir=<in>=<out>\tMake a directory <out> outside visible as <in> inside\n\ | |
|
867 | + --dir=<in>=\t\tDelete a previously defined directory rule (even a default one)\n\ | |
|
868 | + --dir=...:<opt>\tSpecify options for a rule:\n\ | |
|
869 | + \t\t\t\tdev\tAllow access to special files\n\ | |
|
870 | + \t\t\t\tfs\tMount a filesystem (e.g., --dir=/proc:proc:fs)\n\ | |
|
871 | + \t\t\t\tmaybe\tSkip the rule if <out> does not exist\n\ | |
|
872 | + \t\t\t\tnoexec\tDo not allow execution of binaries\n\ | |
|
873 | + \t\t\t\trw\tAllow read-write access\n\ | |
|
874 | + -D, --no-default-dirs\tDo not add default directory rules\n\ | |
|
875 | + -f, --fsize=<size>\tMax size (in KB) of files that can be created\n\ | |
|
876 | + -E, --env=<var>\t\tInherit the environment variable <var> from the parent process\n\ | |
|
877 | + -E, --env=<var>=<val>\tSet the environment variable <var> to <val>; unset it if <var> is empty\n\ | |
|
878 | + -x, --extra-time=<time>\tSet extra timeout, before which a timing-out program is not yet killed,\n\ | |
|
879 | + \t\t\tso that its real execution time is reported (seconds, fractions allowed)\n\ | |
|
880 | + -e, --full-env\t\tInherit full environment of the parent process\n\ | |
|
881 | + --inherit-fds\t\tInherit all file descriptors of the parent process\n\ | |
|
882 | + -m, --mem=<size>\tLimit address space to <size> KB\n\ | |
|
883 | + -M, --meta=<file>\tOutput process information to <file> (name:value)\n\ | |
|
884 | + -q, --quota=<blk>,<ino>\tSet disk quota to <blk> blocks and <ino> inodes\n\ | |
|
885 | + --share-net\t\tShare network namespace with the parent process\n\ | |
|
886 | + -s, --silent\t\tDo not print status messages except for fatal errors\n\ | |
|
887 | + -k, --stack=<size>\tLimit stack size to <size> KB (default: 0=unlimited)\n\ | |
|
888 | + -r, --stderr=<file>\tRedirect stderr to <file>\n\ | |
|
889 | + --stderr-to-stdout\tRedirect stderr to stdout\n\ | |
|
890 | + -i, --stdin=<file>\tRedirect stdin from <file>\n\ | |
|
891 | + -o, --stdout=<file>\tRedirect stdout to <file>\n\ | |
|
892 | + -p, --processes[=<max>]\tEnable multiple processes (at most <max> of them); needs --cg\n\ | |
|
893 | + -t, --time=<time>\tSet run time limit (seconds, fractions allowed)\n\ | |
|
894 | + -v, --verbose\t\tBe verbose (use multiple times for even more verbosity)\n\ | |
|
895 | + -w, --wall-time=<time>\tSet wall clock time limit (seconds, fractions allowed)\n\ | |
|
896 | + \n\ | |
|
897 | + Commands:\n\ | |
|
898 | + --init\t\tInitialize sandbox (and its control group when --cg is used)\n\ | |
|
899 | + --run -- <cmd> ...\tRun given command within sandbox\n\ | |
|
900 | + --cleanup\t\tClean up sandbox\n\ | |
|
901 | + --version\t\tDisplay program version and configuration\n\ | |
|
902 | + "); | |
|
903 | + exit(2); | |
|
904 | + } | |
|
905 | + | |
|
906 | + enum opt_code { | |
|
907 | + OPT_INIT = 256, | |
|
908 | + OPT_RUN, | |
|
909 | + OPT_CLEANUP, | |
|
910 | + OPT_VERSION, | |
|
911 | + OPT_CG, | |
|
912 | + OPT_CG_MEM, | |
|
913 | + OPT_CG_TIMING, | |
|
914 | + OPT_NO_CG_TIMING, | |
|
915 | + OPT_SHARE_NET, | |
|
916 | + OPT_INHERIT_FDS, | |
|
917 | + OPT_STDERR_TO_STDOUT, | |
|
918 | + }; | |
|
919 | + | |
|
920 | + static const char short_opts[] = "b:c:d:DeE:f:i:k:m:M:o:p::q:r:st:vw:x:"; | |
|
921 | + | |
|
922 | + static const struct option long_opts[] = { | |
|
923 | + { "box-id", 1, NULL, 'b' }, | |
|
924 | + { "chdir", 1, NULL, 'c' }, | |
|
925 | + { "cg", 0, NULL, OPT_CG }, | |
|
926 | + { "cg-mem", 1, NULL, OPT_CG_MEM }, | |
|
927 | + { "cg-timing", 0, NULL, OPT_CG_TIMING }, | |
|
928 | + { "cleanup", 0, NULL, OPT_CLEANUP }, | |
|
929 | + { "dir", 1, NULL, 'd' }, | |
|
930 | + { "no-cg-timing", 0, NULL, OPT_NO_CG_TIMING }, | |
|
931 | + { "no-default-dirs", 0, NULL, 'D' }, | |
|
932 | + { "fsize", 1, NULL, 'f' }, | |
|
933 | + { "env", 1, NULL, 'E' }, | |
|
934 | + { "extra-time", 1, NULL, 'x' }, | |
|
935 | + { "full-env", 0, NULL, 'e' }, | |
|
936 | + { "inherit-fds", 0, NULL, OPT_INHERIT_FDS }, | |
|
937 | + { "init", 0, NULL, OPT_INIT }, | |
|
938 | + { "mem", 1, NULL, 'm' }, | |
|
939 | + { "meta", 1, NULL, 'M' }, | |
|
940 | + { "processes", 2, NULL, 'p' }, | |
|
941 | + { "quota", 1, NULL, 'q' }, | |
|
942 | + { "run", 0, NULL, OPT_RUN }, | |
|
943 | + { "share-net", 0, NULL, OPT_SHARE_NET }, | |
|
944 | + { "silent", 0, NULL, 's' }, | |
|
945 | + { "stack", 1, NULL, 'k' }, | |
|
946 | + { "stderr", 1, NULL, 'r' }, | |
|
947 | + { "stderr-to-stdout", 0, NULL, OPT_STDERR_TO_STDOUT }, | |
|
948 | + { "stdin", 1, NULL, 'i' }, | |
|
949 | + { "stdout", 1, NULL, 'o' }, | |
|
950 | + { "time", 1, NULL, 't' }, | |
|
951 | + { "verbose", 0, NULL, 'v' }, | |
|
952 | + { "version", 0, NULL, OPT_VERSION }, | |
|
953 | + { "wall-time", 1, NULL, 'w' }, | |
|
954 | + { NULL, 0, NULL, 0 } | |
|
955 | + }; | |
|
956 | + | |
|
957 | + int | |
|
958 | + main(int argc, char **argv) | |
|
959 | + { | |
|
960 | + int c; | |
|
961 | + int require_cg = 0; | |
|
962 | + char *sep; | |
|
963 | + enum opt_code mode = 0; | |
|
964 | + | |
|
965 | + init_dir_rules(); | |
|
966 | + | |
|
967 | + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) >= 0) | |
|
968 | + switch (c) | |
|
969 | + { | |
|
970 | + case 'b': | |
|
971 | + box_id = atoi(optarg); | |
|
972 | + break; | |
|
973 | + case 'c': | |
|
974 | + set_cwd = optarg; | |
|
975 | + break; | |
|
976 | + case OPT_CG: | |
|
977 | + cg_enable = 1; | |
|
978 | + break; | |
|
979 | + case 'd': | |
|
980 | + if (!set_dir_action(optarg)) | |
|
981 | + usage("Invalid directory specified: %s\n", optarg); | |
|
982 | + break; | |
|
983 | + case 'D': | |
|
984 | + default_dirs = 0; | |
|
985 | + break; | |
|
986 | + case 'e': | |
|
987 | + pass_environ = 1; | |
|
988 | + break; | |
|
989 | + case 'E': | |
|
990 | + if (!set_env_action(optarg)) | |
|
991 | + usage("Invalid environment specified: %s\n", optarg); | |
|
992 | + break; | |
|
993 | + case 'f': | |
|
994 | + fsize_limit = atoi(optarg); | |
|
995 | + break; | |
|
996 | + case 'k': | |
|
997 | + stack_limit = atoi(optarg); | |
|
998 | + break; | |
|
999 | + case 'i': | |
|
1000 | + redir_stdin = optarg; | |
|
1001 | + break; | |
|
1002 | + case 'm': | |
|
1003 | + memory_limit = atoi(optarg); | |
|
1004 | + break; | |
|
1005 | + case 'M': | |
|
1006 | + meta_open(optarg); | |
|
1007 | + break; | |
|
1008 | + case 'o': | |
|
1009 | + redir_stdout = optarg; | |
|
1010 | + break; | |
|
1011 | + case 'p': | |
|
1012 | + if (optarg) | |
|
1013 | + max_processes = atoi(optarg); | |
|
1014 | + else | |
|
1015 | + max_processes = 0; | |
|
1016 | + break; | |
|
1017 | + case 'q': | |
|
1018 | + sep = strchr(optarg, ','); | |
|
1019 | + if (!sep) | |
|
1020 | + usage("Invalid quota specified: %s\n", optarg); | |
|
1021 | + block_quota = atoi(optarg); | |
|
1022 | + inode_quota = atoi(sep+1); | |
|
1023 | + break; | |
|
1024 | + case 'r': | |
|
1025 | + redir_stderr = optarg; | |
|
1026 | + redir_stderr_to_stdout = 0; | |
|
1027 | + break; | |
|
1028 | + case 's': | |
|
1029 | + silent++; | |
|
1030 | + break; | |
|
1031 | + case 't': | |
|
1032 | + timeout = 1000*atof(optarg); | |
|
1033 | + break; | |
|
1034 | + case 'v': | |
|
1035 | + verbose++; | |
|
1036 | + break; | |
|
1037 | + case 'w': | |
|
1038 | + wall_timeout = 1000*atof(optarg); | |
|
1039 | + break; | |
|
1040 | + case 'x': | |
|
1041 | + extra_timeout = 1000*atof(optarg); | |
|
1042 | + break; | |
|
1043 | + case OPT_INIT: | |
|
1044 | + case OPT_RUN: | |
|
1045 | + case OPT_CLEANUP: | |
|
1046 | + case OPT_VERSION: | |
|
1047 | + if (!mode || (int) mode == c) | |
|
1048 | + mode = c; | |
|
1049 | + else | |
|
1050 | + usage("Only one command is allowed.\n"); | |
|
1051 | + break; | |
|
1052 | + case OPT_CG_MEM: | |
|
1053 | + cg_memory_limit = atoi(optarg); | |
|
1054 | + require_cg = 1; | |
|
1055 | + break; | |
|
1056 | + case OPT_CG_TIMING: | |
|
1057 | + cg_timing = 1; | |
|
1058 | + require_cg = 1; | |
|
1059 | + break; | |
|
1060 | + case OPT_NO_CG_TIMING: | |
|
1061 | + cg_timing = 0; | |
|
1062 | + require_cg = 1; | |
|
1063 | + break; | |
|
1064 | + case OPT_SHARE_NET: | |
|
1065 | + share_net = 1; | |
|
1066 | + break; | |
|
1067 | + case OPT_INHERIT_FDS: | |
|
1068 | + inherit_fds = 1; | |
|
1069 | + break; | |
|
1070 | + case OPT_STDERR_TO_STDOUT: | |
|
1071 | + redir_stderr = NULL; | |
|
1072 | + redir_stderr_to_stdout = 1; | |
|
1073 | + break; | |
|
1074 | + default: | |
|
1075 | + usage(NULL); | |
|
1076 | + } | |
|
1077 | + | |
|
1078 | + if (!mode) | |
|
1079 | + usage("Please specify an isolate command (e.g. --init, --run).\n"); | |
|
1080 | + if (mode == OPT_VERSION) | |
|
1081 | + { | |
|
1082 | + show_version(); | |
|
1083 | + return 0; | |
|
1084 | + } | |
|
1085 | + | |
|
1086 | + if (require_cg && !cg_enable) | |
|
1087 | + usage("Options related to control groups require --cg to be set.\n"); | |
|
1088 | + | |
|
1089 | + if (geteuid()) | |
|
1090 | + die("Must be started as root"); | |
|
1091 | + if (getegid() && setegid(0) < 0) | |
|
1092 | + die("Cannot switch to root group: %m"); | |
|
1093 | + orig_uid = getuid(); | |
|
1094 | + orig_gid = getgid(); | |
|
1095 | + | |
|
1096 | + umask(022); | |
|
1097 | + cf_parse(); | |
|
1098 | + box_init(); | |
|
1099 | + cg_init(); | |
|
1100 | + | |
|
1101 | + switch (mode) | |
|
1102 | + { | |
|
1103 | + case OPT_INIT: | |
|
1104 | + if (optind < argc) | |
|
1105 | + usage("--init mode takes no parameters\n"); | |
|
1106 | + init(); | |
|
1107 | + break; | |
|
1108 | + case OPT_RUN: | |
|
1109 | + if (optind >= argc) | |
|
1110 | + usage("--run mode requires a command to run\n"); | |
|
1111 | + run(argv+optind); | |
|
1112 | + break; | |
|
1113 | + case OPT_CLEANUP: | |
|
1114 | + if (optind < argc) | |
|
1115 | + usage("--cleanup mode takes no parameters\n"); | |
|
1116 | + cleanup(); | |
|
1117 | + break; | |
|
1118 | + default: | |
|
1119 | + die("Internal error: mode mismatch"); | |
|
1120 | + } | |
|
1121 | + exit(0); | |
|
1122 | + } |
@@ -0,0 +1,86 | |||
|
1 | + /* | |
|
2 | + * Process Isolator | |
|
3 | + * | |
|
4 | + * (c) 2012-2017 Martin Mares <mj@ucw.cz> | |
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | |
|
6 | + */ | |
|
7 | + | |
|
8 | + #include <stdarg.h> | |
|
9 | + #include <stdint.h> | |
|
10 | + #include <sys/types.h> | |
|
11 | + | |
|
12 | + #define NONRET __attribute__((noreturn)) | |
|
13 | + #define UNUSED __attribute__((unused)) | |
|
14 | + #define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) | |
|
15 | + | |
|
16 | + /* isolate.c */ | |
|
17 | + | |
|
18 | + void die(char *msg, ...) NONRET; | |
|
19 | + void NONRET __attribute__((format(printf,1,2))) err(char *msg, ...); | |
|
20 | + void __attribute__((format(printf,1,2))) msg(char *msg, ...); | |
|
21 | + | |
|
22 | + extern int pass_environ; | |
|
23 | + extern int verbose; | |
|
24 | + extern int block_quota; | |
|
25 | + extern int inode_quota; | |
|
26 | + extern int cg_enable; | |
|
27 | + extern int cg_memory_limit; | |
|
28 | + extern int cg_timing; | |
|
29 | + | |
|
30 | + extern int box_id; | |
|
31 | + extern uid_t box_uid, orig_uid; | |
|
32 | + extern gid_t box_gid, orig_gid; | |
|
33 | + | |
|
34 | + /* util.c */ | |
|
35 | + | |
|
36 | + void *xmalloc(size_t size); | |
|
37 | + char *xstrdup(char *str); | |
|
38 | + int dir_exists(char *path); | |
|
39 | + void rmtree(char *path); | |
|
40 | + void make_dir(char *path); | |
|
41 | + void chowntree(char *path, uid_t uid, gid_t gid); | |
|
42 | + void close_all_fds(void); | |
|
43 | + | |
|
44 | + void meta_open(const char *name); | |
|
45 | + void meta_close(void); | |
|
46 | + void __attribute__((format(printf,1,2))) meta_printf(const char *fmt, ...); | |
|
47 | + | |
|
48 | + /* rules.c */ | |
|
49 | + | |
|
50 | + int set_env_action(char *a0); | |
|
51 | + char **setup_environment(void); | |
|
52 | + | |
|
53 | + void init_dir_rules(void); | |
|
54 | + int set_dir_action(char *arg); | |
|
55 | + void apply_dir_rules(int with_defaults); | |
|
56 | + | |
|
57 | + void set_quota(void); | |
|
58 | + | |
|
59 | + /* cg.c */ | |
|
60 | + | |
|
61 | + void cg_init(void); | |
|
62 | + void cg_prepare(void); | |
|
63 | + void cg_enter(void); | |
|
64 | + int cg_get_run_time_ms(void); | |
|
65 | + void cg_stats(void); | |
|
66 | + void cg_remove(void); | |
|
67 | + | |
|
68 | + /* config.c */ | |
|
69 | + | |
|
70 | + extern char *cf_box_root; | |
|
71 | + extern char *cf_cg_root; | |
|
72 | + extern char *cf_cg_parent; | |
|
73 | + extern int cf_first_uid; | |
|
74 | + extern int cf_first_gid; | |
|
75 | + extern int cf_num_boxes; | |
|
76 | + | |
|
77 | + struct cf_per_box { | |
|
78 | + struct cf_per_box *next; | |
|
79 | + int box_id; | |
|
80 | + char *cpus; | |
|
81 | + char *mems; | |
|
82 | + }; | |
|
83 | + | |
|
84 | + void cf_parse(void); | |
|
85 | + struct cf_per_box *cf_per_box(int box_id); | |
|
86 | + struct cf_per_box *cf_current_box(void); |
This diff has been collapsed as it changes many lines, (509 lines changed) Show them Hide them | |||
@@ -0,0 +1,509 | |||
|
1 | + /* | |
|
2 | + * Process Isolator -- Rules | |
|
3 | + * | |
|
4 | + * (c) 2012-2018 Martin Mares <mj@ucw.cz> | |
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | |
|
6 | + */ | |
|
7 | + | |
|
8 | + #include "isolate.h" | |
|
9 | + | |
|
10 | + #include <limits.h> | |
|
11 | + #include <mntent.h> | |
|
12 | + #include <stdio.h> | |
|
13 | + #include <stdlib.h> | |
|
14 | + #include <string.h> | |
|
15 | + #include <sys/capability.h> | |
|
16 | + #include <sys/mount.h> | |
|
17 | + #include <sys/quota.h> | |
|
18 | + #include <sys/stat.h> | |
|
19 | + #include <sys/vfs.h> | |
|
20 | + #include <unistd.h> | |
|
21 | + | |
|
22 | + /*** Environment rules ***/ | |
|
23 | + | |
|
24 | + struct env_rule { | |
|
25 | + char *var; // Variable to match | |
|
26 | + char *val; // ""=clear, NULL=inherit | |
|
27 | + int var_len; | |
|
28 | + struct env_rule *next; | |
|
29 | + }; | |
|
30 | + | |
|
31 | + static struct env_rule *first_env_rule; | |
|
32 | + static struct env_rule **last_env_rule = &first_env_rule; | |
|
33 | + | |
|
34 | + static struct env_rule default_env_rules[] = { | |
|
35 | + { .var = "LIBC_FATAL_STDERR_", .val = "1", .var_len = 18 }, | |
|
36 | + }; | |
|
37 | + | |
|
38 | + int | |
|
39 | + set_env_action(char *a0) | |
|
40 | + { | |
|
41 | + struct env_rule *r = xmalloc(sizeof(*r) + strlen(a0) + 1); | |
|
42 | + char *a = (char *)(r+1); | |
|
43 | + strcpy(a, a0); | |
|
44 | + | |
|
45 | + char *sep = strchr(a, '='); | |
|
46 | + if (sep == a) | |
|
47 | + return 0; | |
|
48 | + r->var = a; | |
|
49 | + if (sep) | |
|
50 | + { | |
|
51 | + *sep++ = 0; | |
|
52 | + r->val = sep; | |
|
53 | + } | |
|
54 | + else | |
|
55 | + r->val = NULL; | |
|
56 | + *last_env_rule = r; | |
|
57 | + last_env_rule = &r->next; | |
|
58 | + r->next = NULL; | |
|
59 | + return 1; | |
|
60 | + } | |
|
61 | + | |
|
62 | + static int | |
|
63 | + match_env_var(char *env_entry, struct env_rule *r) | |
|
64 | + { | |
|
65 | + if (strncmp(env_entry, r->var, r->var_len)) | |
|
66 | + return 0; | |
|
67 | + return (env_entry[r->var_len] == '='); | |
|
68 | + } | |
|
69 | + | |
|
70 | + static void | |
|
71 | + apply_env_rule(char **env, int *env_sizep, struct env_rule *r) | |
|
72 | + { | |
|
73 | + // First remove the variable if already set | |
|
74 | + int pos = 0; | |
|
75 | + while (pos < *env_sizep && !match_env_var(env[pos], r)) | |
|
76 | + pos++; | |
|
77 | + if (pos < *env_sizep) | |
|
78 | + { | |
|
79 | + (*env_sizep)--; | |
|
80 | + env[pos] = env[*env_sizep]; | |
|
81 | + env[*env_sizep] = NULL; | |
|
82 | + } | |
|
83 | + | |
|
84 | + // What is the new value? | |
|
85 | + char *new; | |
|
86 | + if (r->val) | |
|
87 | + { | |
|
88 | + if (!r->val[0]) | |
|
89 | + return; | |
|
90 | + new = xmalloc(r->var_len + 1 + strlen(r->val) + 1); | |
|
91 | + sprintf(new, "%s=%s", r->var, r->val); | |
|
92 | + } | |
|
93 | + else | |
|
94 | + { | |
|
95 | + pos = 0; | |
|
96 | + while (environ[pos] && !match_env_var(environ[pos], r)) | |
|
97 | + pos++; | |
|
98 | + if (!(new = environ[pos])) | |
|
99 | + return; | |
|
100 | + } | |
|
101 | + | |
|
102 | + // Add it at the end of the array | |
|
103 | + env[(*env_sizep)++] = new; | |
|
104 | + env[*env_sizep] = NULL; | |
|
105 | + } | |
|
106 | + | |
|
107 | + char ** | |
|
108 | + setup_environment(void) | |
|
109 | + { | |
|
110 | + // Link built-in rules with user rules | |
|
111 | + for (int i=ARRAY_SIZE(default_env_rules)-1; i >= 0; i--) | |
|
112 | + { | |
|
113 | + default_env_rules[i].next = first_env_rule; | |
|
114 | + first_env_rule = &default_env_rules[i]; | |
|
115 | + } | |
|
116 | + | |
|
117 | + // Scan the original environment | |
|
118 | + char **orig_env = environ; | |
|
119 | + int orig_size = 0; | |
|
120 | + while (orig_env[orig_size]) | |
|
121 | + orig_size++; | |
|
122 | + | |
|
123 | + // For each rule, reserve one more slot and calculate length | |
|
124 | + int num_rules = 0; | |
|
125 | + for (struct env_rule *r = first_env_rule; r; r=r->next) | |
|
126 | + { | |
|
127 | + num_rules++; | |
|
128 | + r->var_len = strlen(r->var); | |
|
129 | + } | |
|
130 | + | |
|
131 | + // Create a new environment | |
|
132 | + char **env = xmalloc((orig_size + num_rules + 1) * sizeof(char *)); | |
|
133 | + int size; | |
|
134 | + if (pass_environ) | |
|
135 | + { | |
|
136 | + memcpy(env, environ, orig_size * sizeof(char *)); | |
|
137 | + size = orig_size; | |
|
138 | + } | |
|
139 | + else | |
|
140 | + size = 0; | |
|
141 | + env[size] = NULL; | |
|
142 | + | |
|
143 | + // Apply the rules one by one | |
|
144 | + for (struct env_rule *r = first_env_rule; r; r=r->next) | |
|
145 | + apply_env_rule(env, &size, r); | |
|
146 | + | |
|
147 | + // Return the new env and pass some gossip | |
|
148 | + if (verbose > 1) | |
|
149 | + { | |
|
150 | + fprintf(stderr, "Passing environment:\n"); | |
|
151 | + for (int i=0; env[i]; i++) | |
|
152 | + fprintf(stderr, "\t%s\n", env[i]); | |
|
153 | + } | |
|
154 | + return env; | |
|
155 | + } | |
|
156 | + | |
|
157 | + /*** Directory rules ***/ | |
|
158 | + | |
|
159 | + struct dir_rule { | |
|
160 | + char *inside; // A relative path | |
|
161 | + char *outside; // This can be an absolute path or a relative path starting with "./" | |
|
162 | + unsigned int flags; // DIR_FLAG_xxx | |
|
163 | + struct dir_rule *next; | |
|
164 | + }; | |
|
165 | + | |
|
166 | + enum dir_rule_flags { | |
|
167 | + DIR_FLAG_RW = 1, | |
|
168 | + DIR_FLAG_NOEXEC = 2, | |
|
169 | + DIR_FLAG_FS = 4, | |
|
170 | + DIR_FLAG_MAYBE = 8, | |
|
171 | + DIR_FLAG_DEV = 16, | |
|
172 | + DIR_FLAG_DEFAULT = 1U << 15, // Used internally | |
|
173 | + DIR_FLAG_DISABLED = 1U << 16, // Used internally | |
|
174 | + }; | |
|
175 | + | |
|
176 | + static const char * const dir_flag_names[] = { "rw", "noexec", "fs", "maybe", "dev" }; | |
|
177 | + | |
|
178 | + static struct dir_rule *first_dir_rule; | |
|
179 | + static struct dir_rule **last_dir_rule = &first_dir_rule; | |
|
180 | + | |
|
181 | + static char * | |
|
182 | + sanitize_dir_path(char *path) | |
|
183 | + { | |
|
184 | + // Strip leading slashes | |
|
185 | + while (*path == '/') | |
|
186 | + path++; | |
|
187 | + if (!*path) | |
|
188 | + return NULL; | |
|
189 | + | |
|
190 | + // Check for ".." components | |
|
191 | + char *p = path; | |
|
192 | + while (*p) | |
|
193 | + { | |
|
194 | + char *next = strchr(p, '/'); | |
|
195 | + if (!next) | |
|
196 | + next = p + strlen(p); | |
|
197 | + | |
|
198 | + int len = next - p; | |
|
199 | + if (len == 2 && !memcmp(p, "..", 2)) | |
|
200 | + return NULL; | |
|
201 | + | |
|
202 | + p = *next ? next+1 : next; | |
|
203 | + } | |
|
204 | + | |
|
205 | + return path; | |
|
206 | + } | |
|
207 | + | |
|
208 | + static int | |
|
209 | + add_dir_rule(char *in, char *out, unsigned int flags) | |
|
210 | + { | |
|
211 | + // Make sure that "in" does not try to escape the box | |
|
212 | + in = sanitize_dir_path(in); | |
|
213 | + if (!in) | |
|
214 | + return 0; | |
|
215 | + | |
|
216 | + // Check "out" | |
|
217 | + if (flags & DIR_FLAG_FS) | |
|
218 | + { | |
|
219 | + if (!out || out[0] == '/') | |
|
220 | + return 0; | |
|
221 | + } | |
|
222 | + else | |
|
223 | + { | |
|
224 | + if (out && out[0] != '/' && strncmp(out, "./", 2)) | |
|
225 | + return 0; | |
|
226 | + } | |
|
227 | + | |
|
228 | + // Override an existing rule | |
|
229 | + struct dir_rule *r; | |
|
230 | + for (r = first_dir_rule; r; r = r->next) | |
|
231 | + if (!strcmp(r->inside, in)) | |
|
232 | + break; | |
|
233 | + | |
|
234 | + // Add a new rule | |
|
235 | + if (!r) | |
|
236 | + { | |
|
237 | + r = xmalloc(sizeof(*r)); | |
|
238 | + r->inside = in; | |
|
239 | + *last_dir_rule = r; | |
|
240 | + last_dir_rule = &r->next; | |
|
241 | + r->next = NULL; | |
|
242 | + } | |
|
243 | + r->outside = out; | |
|
244 | + r->flags = flags; | |
|
245 | + return 1; | |
|
246 | + } | |
|
247 | + | |
|
248 | + static unsigned int | |
|
249 | + parse_dir_option(char *opt) | |
|
250 | + { | |
|
251 | + for (unsigned int i = 0; i < ARRAY_SIZE(dir_flag_names); i++) | |
|
252 | + if (!strcmp(opt, dir_flag_names[i])) | |
|
253 | + return 1U << i; | |
|
254 | + die("Unknown directory option %s", opt); | |
|
255 | + } | |
|
256 | + | |
|
257 | + static int | |
|
258 | + set_dir_action_ext(char *arg, unsigned int ext_flags) | |
|
259 | + { | |
|
260 | + arg = xstrdup(arg); | |
|
261 | + | |
|
262 | + char *colon = strchr(arg, ':'); | |
|
263 | + unsigned int flags = ext_flags; | |
|
264 | + while (colon) | |
|
265 | + { | |
|
266 | + *colon++ = 0; | |
|
267 | + char *next = strchr(colon, ':'); | |
|
268 | + if (next) | |
|
269 | + *next = 0; | |
|
270 | + flags |= parse_dir_option(colon); | |
|
271 | + colon = next; | |
|
272 | + } | |
|
273 | + | |
|
274 | + char *eq = strchr(arg, '='); | |
|
275 | + if (eq) | |
|
276 | + { | |
|
277 | + *eq++ = 0; | |
|
278 | + return add_dir_rule(arg, (*eq ? eq : NULL), flags); | |
|
279 | + } | |
|
280 | + else | |
|
281 | + { | |
|
282 | + char *out = xmalloc(1 + strlen(arg) + 1); | |
|
283 | + sprintf(out, "/%s", arg); | |
|
284 | + return add_dir_rule(arg, out, flags); | |
|
285 | + } | |
|
286 | + } | |
|
287 | + | |
|
288 | + int | |
|
289 | + set_dir_action(char *arg) | |
|
290 | + { | |
|
291 | + return set_dir_action_ext(arg, 0); | |
|
292 | + } | |
|
293 | + | |
|
294 | + static int | |
|
295 | + set_dir_action_default(char *arg) | |
|
296 | + { | |
|
297 | + return set_dir_action_ext(arg, DIR_FLAG_DEFAULT); | |
|
298 | + } | |
|
299 | + | |
|
300 | + void | |
|
301 | + init_dir_rules(void) | |
|
302 | + { | |
|
303 | + set_dir_action_default("box=./box:rw"); | |
|
304 | + set_dir_action_default("bin"); | |
|
305 | + set_dir_action_default("dev:dev"); | |
|
306 | + set_dir_action_default("lib"); | |
|
307 | + set_dir_action_default("lib64:maybe"); | |
|
308 | + set_dir_action_default("proc=proc:fs"); | |
|
309 | + set_dir_action_default("usr"); | |
|
310 | + } | |
|
311 | + | |
|
312 | + static void | |
|
313 | + set_cap_sys_admin(void) | |
|
314 | + { | |
|
315 | + cap_t caps; | |
|
316 | + if (!(caps = cap_get_proc())) | |
|
317 | + die("Cannot get capabilities: %m"); | |
|
318 | + | |
|
319 | + cap_value_t cap_list[] = { CAP_SYS_ADMIN }; | |
|
320 | + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_SET) < 0) | |
|
321 | + die("Cannot modify capabilities"); | |
|
322 | + | |
|
323 | + if (cap_set_proc(caps) < 0) | |
|
324 | + die("Cannot set capabilities: %m"); | |
|
325 | + | |
|
326 | + cap_free(caps); | |
|
327 | + } | |
|
328 | + | |
|
329 | + void | |
|
330 | + apply_dir_rules(int with_defaults) | |
|
331 | + { | |
|
332 | + /* | |
|
333 | + * Before mounting anything, we create all mount points inside the box. | |
|
334 | + * This is necessary to avoid bypassing directory permissions. If you | |
|
335 | + * want nested binds, you have to create the mount points explicitly. | |
|
336 | + */ | |
|
337 | + for (struct dir_rule *r = first_dir_rule; r; r=r->next) | |
|
338 | + { | |
|
339 | + if (!with_defaults && (r->flags & DIR_FLAG_DEFAULT)) | |
|
340 | + continue; | |
|
341 | + | |
|
342 | + char *in = r->inside; | |
|
343 | + char *out = r->outside; | |
|
344 | + | |
|
345 | + if (!out) | |
|
346 | + { | |
|
347 | + msg("Not binding anything on %s\n", in); | |
|
348 | + r->flags |= DIR_FLAG_DISABLED; | |
|
349 | + continue; | |
|
350 | + } | |
|
351 | + | |
|
352 | + if ((r->flags & DIR_FLAG_MAYBE) && !dir_exists(out)) | |
|
353 | + { | |
|
354 | + msg("Not binding %s on %s (does not exist)\n", out, r->inside); | |
|
355 | + r->flags |= DIR_FLAG_DISABLED; | |
|
356 | + continue; | |
|
357 | + } | |
|
358 | + | |
|
359 | + char root_in[1024]; | |
|
360 | + snprintf(root_in, sizeof(root_in), "root/%s", in); | |
|
361 | + make_dir(root_in); | |
|
362 | + } | |
|
363 | + | |
|
364 | + for (struct dir_rule *r = first_dir_rule; r; r=r->next) | |
|
365 | + { | |
|
366 | + if (r->flags & DIR_FLAG_DISABLED) | |
|
367 | + continue; | |
|
368 | + if (!with_defaults && (r->flags & DIR_FLAG_DEFAULT)) | |
|
369 | + continue; | |
|
370 | + | |
|
371 | + char *in = r->inside; | |
|
372 | + char *out = r->outside; | |
|
373 | + char root_in[1024]; | |
|
374 | + snprintf(root_in, sizeof(root_in), "root/%s", in); | |
|
375 | + | |
|
376 | + unsigned long mount_flags = 0; | |
|
377 | + if (!(r->flags & DIR_FLAG_RW)) | |
|
378 | + mount_flags |= MS_RDONLY; | |
|
379 | + if (r->flags & DIR_FLAG_NOEXEC) | |
|
380 | + mount_flags |= MS_NOEXEC; | |
|
381 | + if (!(r->flags & DIR_FLAG_DEV)) | |
|
382 | + mount_flags |= MS_NODEV; | |
|
383 | + | |
|
384 | + if (r->flags & DIR_FLAG_FS) | |
|
385 | + { | |
|
386 | + msg("Mounting %s on %s (flags %lx)\n", out, in, mount_flags); | |
|
387 | + if (mount("none", root_in, out, mount_flags, "") < 0) | |
|
388 | + die("Cannot mount %s on %s: %m", out, in); | |
|
389 | + if (!strcmp(in, "proc")) | |
|
390 | + { | |
|
391 | + // If we are mounting procfs, add hidepid=2, so that only the processes | |
|
392 | + // of the same user are visible. This has to be done as a remount. | |
|
393 | + if (mount("none", root_in, out, MS_REMOUNT | mount_flags, "hidepid=2") < 0) | |
|
394 | + die("Cannot re-mount proc with hidepid option: %m"); | |
|
395 | + } | |
|
396 | + } | |
|
397 | + else | |
|
398 | + { | |
|
399 | + mount_flags |= MS_BIND | MS_NOSUID; | |
|
400 | + msg("Binding %s on %s (flags %lx)\n", out, in, mount_flags); | |
|
401 | + | |
|
402 | + /* | |
|
403 | + * This is tricky. We cannot run mount() with root privileges, since | |
|
404 | + * it could be used to bypass access control if the mounted path | |
|
405 | + * contains elements inaccessible to the user running isolate. | |
|
406 | + * | |
|
407 | + * We switch effective UID and GID back to the calling user (which clears | |
|
408 | + * all capabilities, but keeps them in the permitted set) and then | |
|
409 | + * enable CAP_SYS_ADMIN. So we have CAP_SYS_ADMIN (needed for mount), | |
|
410 | + * but not CAP_DAC_OVERRIDE (which allows to bypass permission checks). | |
|
411 | + */ | |
|
412 | + | |
|
413 | + if (setresuid(orig_uid, orig_uid, 0) < 0 || | |
|
414 | + setresgid(orig_gid, orig_gid, 0) < 0) | |
|
415 | + die("Cannot switch UID and GID: %m"); | |
|
416 | + | |
|
417 | + set_cap_sys_admin(); | |
|
418 | + | |
|
419 | + // Most mount flags need remount to work | |
|
420 | + if (mount(out, root_in, "none", mount_flags, "") < 0 || | |
|
421 | + mount(out, root_in, "none", MS_REMOUNT | mount_flags, "") < 0) | |
|
422 | + die("Cannot mount %s on %s: %m", out, in); | |
|
423 | + | |
|
424 | + if (setresuid(orig_uid, 0, orig_uid) < 0 || | |
|
425 | + setresgid(orig_gid, 0, orig_gid) < 0) | |
|
426 | + die("Cannot switch UID and GID: %m"); | |
|
427 | + } | |
|
428 | + } | |
|
429 | + } | |
|
430 | + | |
|
431 | + /*** Disk quotas ***/ | |
|
432 | + | |
|
433 | + static int | |
|
434 | + path_begins_with(char *path, char *with) | |
|
435 | + { | |
|
436 | + while (*with) | |
|
437 | + if (*path++ != *with++) | |
|
438 | + return 0; | |
|
439 | + return (!*with || *with == '/'); | |
|
440 | + } | |
|
441 | + | |
|
442 | + static char * | |
|
443 | + find_device(char *path) | |
|
444 | + { | |
|
445 | + FILE *f = setmntent("/proc/mounts", "r"); | |
|
446 | + if (!f) | |
|
447 | + die("Cannot open /proc/mounts: %m"); | |
|
448 | + | |
|
449 | + struct mntent *me; | |
|
450 | + int best_len = 0; | |
|
451 | + char *best_dev = NULL; | |
|
452 | + while (me = getmntent(f)) | |
|
453 | + { | |
|
454 | + if (!path_begins_with(me->mnt_fsname, "/dev")) | |
|
455 | + continue; | |
|
456 | + if (path_begins_with(path, me->mnt_dir)) | |
|
457 | + { | |
|
458 | + int len = strlen(me->mnt_dir); | |
|
459 | + if (len > best_len) | |
|
460 | + { | |
|
461 | + best_len = len; | |
|
462 | + free(best_dev); | |
|
463 | + best_dev = xstrdup(me->mnt_fsname); | |
|
464 | + } | |
|
465 | + } | |
|
466 | + } | |
|
467 | + endmntent(f); | |
|
468 | + return best_dev; | |
|
469 | + } | |
|
470 | + | |
|
471 | + void | |
|
472 | + set_quota(void) | |
|
473 | + { | |
|
474 | + if (!block_quota) | |
|
475 | + return; | |
|
476 | + | |
|
477 | + char cwd[PATH_MAX]; | |
|
478 | + if (!getcwd(cwd, sizeof(cwd))) | |
|
479 | + die("getcwd: %m"); | |
|
480 | + | |
|
481 | + char *dev = find_device(cwd); | |
|
482 | + if (!dev) | |
|
483 | + die("Cannot identify filesystem which contains %s", cwd); | |
|
484 | + msg("Quota: Mapped path %s to a filesystem on %s\n", cwd, dev); | |
|
485 | + | |
|
486 | + // Sanity check | |
|
487 | + struct stat dev_st, cwd_st; | |
|
488 | + if (stat(dev, &dev_st) < 0) | |
|
489 | + die("Cannot identify block device %s: %m", dev); | |
|
490 | + if (!S_ISBLK(dev_st.st_mode)) | |
|
491 | + die("Expected that %s is a block device", dev); | |
|
492 | + if (stat(".", &cwd_st) < 0) | |
|
493 | + die("Cannot stat cwd: %m"); | |
|
494 | + if (cwd_st.st_dev != dev_st.st_rdev) | |
|
495 | + die("Identified %s as a filesystem on %s, but it is obviously false", cwd, dev); | |
|
496 | + | |
|
497 | + struct dqblk dq = { | |
|
498 | + .dqb_bhardlimit = block_quota, | |
|
499 | + .dqb_bsoftlimit = block_quota, | |
|
500 | + .dqb_ihardlimit = inode_quota, | |
|
501 | + .dqb_isoftlimit = inode_quota, | |
|
502 | + .dqb_valid = QIF_LIMITS, | |
|
503 | + }; | |
|
504 | + if (quotactl(QCMD(Q_SETQUOTA, USRQUOTA), dev, box_uid, (caddr_t) &dq) < 0) | |
|
505 | + die("Cannot set disk quota: %m"); | |
|
506 | + msg("Quota: Set block quota %d and inode quota %d\n", block_quota, inode_quota); | |
|
507 | + | |
|
508 | + free(dev); | |
|
509 | + } |
@@ -0,0 +1,182 | |||
|
1 | + /* | |
|
2 | + * Process Isolator -- Utility Functions | |
|
3 | + * | |
|
4 | + * (c) 2012-2017 Martin Mares <mj@ucw.cz> | |
|
5 | + * (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au> | |
|
6 | + */ | |
|
7 | + | |
|
8 | + #include "isolate.h" | |
|
9 | + | |
|
10 | + #include <dirent.h> | |
|
11 | + #include <errno.h> | |
|
12 | + #include <ftw.h> | |
|
13 | + #include <stdio.h> | |
|
14 | + #include <stdlib.h> | |
|
15 | + #include <string.h> | |
|
16 | + #include <sys/fsuid.h> | |
|
17 | + #include <sys/stat.h> | |
|
18 | + #include <unistd.h> | |
|
19 | + | |
|
20 | + void * | |
|
21 | + xmalloc(size_t size) | |
|
22 | + { | |
|
23 | + void *p = malloc(size); | |
|
24 | + if (!p) | |
|
25 | + die("Out of memory"); | |
|
26 | + return p; | |
|
27 | + } | |
|
28 | + | |
|
29 | + char * | |
|
30 | + xstrdup(char *str) | |
|
31 | + { | |
|
32 | + char *p = strdup(str); | |
|
33 | + if (!p) | |
|
34 | + die("Out of memory"); | |
|
35 | + return p; | |
|
36 | + } | |
|
37 | + | |
|
38 | + int | |
|
39 | + dir_exists(char *path) | |
|
40 | + { | |
|
41 | + struct stat st; | |
|
42 | + return (stat(path, &st) >= 0 && S_ISDIR(st.st_mode)); | |
|
43 | + } | |
|
44 | + | |
|
45 | + void | |
|
46 | + make_dir(char *path) | |
|
47 | + { | |
|
48 | + char *sep = (path[0] == '/' ? path+1 : path); | |
|
49 | + | |
|
50 | + for (;;) | |
|
51 | + { | |
|
52 | + sep = strchr(sep, '/'); | |
|
53 | + if (sep) | |
|
54 | + *sep = 0; | |
|
55 | + | |
|
56 | + if (mkdir(path, 0777) < 0 && errno != EEXIST) | |
|
57 | + die("Cannot create directory %s: %m", path); | |
|
58 | + | |
|
59 | + if (!sep) | |
|
60 | + break; | |
|
61 | + *sep++ = '/'; | |
|
62 | + } | |
|
63 | + | |
|
64 | + // mkdir() above may have returned EEXIST even if the path was not | |
|
65 | + // a directory. Ensure that it is. | |
|
66 | + struct stat st; | |
|
67 | + if (stat(path, &st) < 0) | |
|
68 | + die("Cannot stat %s: %m", path); | |
|
69 | + if (!S_ISDIR(st.st_mode)) | |
|
70 | + die("Cannot create %s: already exists, but not a directory", path); | |
|
71 | + } | |
|
72 | + | |
|
73 | + | |
|
74 | + static int | |
|
75 | + rmtree_helper(const char *fpath, const struct stat *sb, int typeflag UNUSED, struct FTW *ftwbuf UNUSED) | |
|
76 | + { | |
|
77 | + if (S_ISDIR(sb->st_mode)) | |
|
78 | + { | |
|
79 | + if (rmdir(fpath) < 0) | |
|
80 | + die("Cannot rmdir %s: %m", fpath); | |
|
81 | + } | |
|
82 | + else | |
|
83 | + { | |
|
84 | + if (unlink(fpath) < 0) | |
|
85 | + die("Cannot unlink %s: %m", fpath); | |
|
86 | + } | |
|
87 | + return 0; | |
|
88 | + } | |
|
89 | + | |
|
90 | + void | |
|
91 | + rmtree(char *path) | |
|
92 | + { | |
|
93 | + nftw(path, rmtree_helper, 32, FTW_MOUNT | FTW_PHYS | FTW_DEPTH); | |
|
94 | + } | |
|
95 | + | |
|
96 | + static uid_t chown_uid; | |
|
97 | + static gid_t chown_gid; | |
|
98 | + | |
|
99 | + static int | |
|
100 | + chowntree_helper(const char *fpath, const struct stat *sb UNUSED, int typeflag UNUSED, struct FTW *ftwbuf UNUSED) | |
|
101 | + { | |
|
102 | + if (lchown(fpath, chown_uid, chown_gid) < 0) | |
|
103 | + die("Cannot chown %s: %m", fpath); | |
|
104 | + else | |
|
105 | + return 0; | |
|
106 | + } | |
|
107 | + | |
|
108 | + void | |
|
109 | + chowntree(char *path, uid_t uid, gid_t gid) | |
|
110 | + { | |
|
111 | + chown_uid = uid; | |
|
112 | + chown_gid = gid; | |
|
113 | + nftw(path, chowntree_helper, 32, FTW_MOUNT | FTW_PHYS); | |
|
114 | + } | |
|
115 | + | |
|
116 | + static int fd_to_keep = -1; | |
|
117 | + | |
|
118 | + void | |
|
119 | + close_all_fds(void) | |
|
120 | + { | |
|
121 | + /* Close all file descriptors except 0, 1, 2 */ | |
|
122 | + | |
|
123 | + DIR *dir = opendir("/proc/self/fd"); | |
|
124 | + if (!dir) | |
|
125 | + die("Cannot open /proc/self/fd: %m"); | |
|
126 | + int dir_fd = dirfd(dir); | |
|
127 | + | |
|
128 | + struct dirent *e; | |
|
129 | + while (e = readdir(dir)) | |
|
130 | + { | |
|
131 | + char *end; | |
|
132 | + long int fd = strtol(e->d_name, &end, 10); | |
|
133 | + if (*end) | |
|
134 | + continue; | |
|
135 | + if (fd >= 0 && fd <= 2 || fd == dir_fd || fd == fd_to_keep) | |
|
136 | + continue; | |
|
137 | + close(fd); | |
|
138 | + } | |
|
139 | + | |
|
140 | + closedir(dir); | |
|
141 | + } | |
|
142 | + | |
|
143 | + /*** Meta-files ***/ | |
|
144 | + | |
|
145 | + static FILE *metafile; | |
|
146 | + | |
|
147 | + void | |
|
148 | + meta_open(const char *name) | |
|
149 | + { | |
|
150 | + if (!strcmp(name, "-")) | |
|
151 | + { | |
|
152 | + metafile = stdout; | |
|
153 | + return; | |
|
154 | + } | |
|
155 | + if (setfsuid(getuid()) < 0) | |
|
156 | + die("Failed to switch FS UID: %m"); | |
|
157 | + metafile = fopen(name, "w"); | |
|
158 | + if (setfsuid(geteuid()) < 0) | |
|
159 | + die("Failed to switch FS UID back: %m"); | |
|
160 | + if (!metafile) | |
|
161 | + die("Failed to open metafile '%s'",name); | |
|
162 | + fd_to_keep = fileno(metafile); | |
|
163 | + } | |
|
164 | + | |
|
165 | + void | |
|
166 | + meta_close(void) | |
|
167 | + { | |
|
168 | + if (metafile && metafile != stdout) | |
|
169 | + fclose(metafile); | |
|
170 | + } | |
|
171 | + | |
|
172 | + void | |
|
173 | + meta_printf(const char *fmt, ...) | |
|
174 | + { | |
|
175 | + if (!metafile) | |
|
176 | + return; | |
|
177 | + | |
|
178 | + va_list args; | |
|
179 | + va_start(args, fmt); | |
|
180 | + vfprintf(metafile, fmt, args); | |
|
181 | + va_end(args); | |
|
182 | + } |
@@ -74,21 +74,25 | |||
|
74 | 74 | |
|
75 | 75 | if !FileTest.exist?(problem_home) |
|
76 | 76 | puts "PROBLEM DIR: #{problem_home}" |
|
77 | 77 | raise "engine: No test data." |
|
78 | 78 | end |
|
79 | 79 | |
|
80 | + talk "ENGINE: grading dir at #{grading_dir} is created" | |
|
81 | + | |
|
80 | 82 | # copy the source script, using lock |
|
81 | 83 | dinit = DirInit::Manager.new(problem_home) |
|
82 | 84 | |
|
83 | 85 | # lock the directory and copy the scripts |
|
84 | 86 | dinit.setup do |
|
85 | 87 | copy_log = copy_script(problem_home) |
|
86 | 88 | save_copy_log(problem_home,copy_log) |
|
89 | + talk "ENGINE: following std script is copied: #{copy_log.join ' '}" | |
|
87 | 90 | end |
|
88 | 91 | |
|
92 | + | |
|
89 | 93 | call_judge(problem_home,language,grading_dir,source_name) |
|
90 | 94 | |
|
91 | 95 | @reporter.report(submission,"#{grading_dir}/test-result") |
|
92 | 96 | |
|
93 | 97 | # unlock the directory |
|
94 | 98 | dinit.teardown do |
@@ -118,17 +122,16 | |||
|
118 | 122 | #change directory to problem_home |
|
119 | 123 | #call the "judge" script |
|
120 | 124 | def call_judge(problem_home,language,grading_dir,fname) |
|
121 | 125 | ENV['PROBLEM_HOME'] = problem_home |
|
122 | 126 | ENV['RUBYOPT'] = '' |
|
123 | 127 | |
|
124 | - talk grading_dir | |
|
125 | 128 | Dir.chdir grading_dir |
|
126 | 129 | script_name = "#{problem_home}/script/judge" |
|
127 | 130 | cmd = "#{script_name} #{language} #{fname}" |
|
128 |
- talk " |
|
|
131 | + talk "ENGINE: Calling Judge at #{cmd}" | |
|
129 | 132 | warn "ERROR: file does not exists #{script_name}" unless File.exists? script_name |
|
130 | 133 | system(cmd) |
|
131 | 134 | end |
|
132 | 135 | |
|
133 | 136 | def get_std_script_dir |
|
134 | 137 | GRADER_ROOT + '/std-script' |
@@ -49,13 +49,13 | |||
|
49 | 49 | end |
|
50 | 50 | end |
|
51 | 51 | end |
|
52 | 52 | end |
|
53 | 53 | |
|
54 | 54 | def grade_submission(submission) |
|
55 |
- puts " |
|
|
55 | + puts "RUNNER: grade submission: #{submission.id} by #{submission.try(:user).try(:full_name)}" | |
|
56 | 56 | @engine.grade(submission) |
|
57 | 57 | end |
|
58 | 58 | |
|
59 | 59 | def grade_oldest_test_request |
|
60 | 60 | test_request = TestRequest.get_inqueue_and_change_status(Task::STATUS_GRADING) |
|
61 | 61 | if test_request!=nil |
@@ -64,49 +64,54 | |||
|
64 | 64 | index, default = PARAMS[param_name] |
|
65 | 65 | if ARGV.length > index |
|
66 | 66 | params[param_name] = ARGV[index] |
|
67 | 67 | else |
|
68 | 68 | params[param_name] = default |
|
69 | 69 | end |
|
70 | - talk "#{param_name}: #{params[param_name]}" | |
|
70 | + talk "COMPILE: param: #{param_name}: #{params[param_name]}" | |
|
71 | 71 | end |
|
72 | + talk "COMPILE: working dir = " + Dir.pwd | |
|
72 | 73 | |
|
73 | 74 | # Remove any remaining output files or message files. |
|
74 | 75 | if FileTest.exists? params[:output_file] |
|
75 | 76 | FileUtils.rm(params[:output_file]) |
|
76 | 77 | end |
|
77 | 78 | if FileTest.exists? params[:message_file] |
|
78 | 79 | FileUtils.rm(params[:message_file]) |
|
79 | 80 | end |
|
80 | 81 | |
|
81 | 82 | # Check if the source file exists before attempt compiling. |
|
82 | 83 | if !FileTest.exists? params[:source_file] |
|
83 | - talk("ERROR: The source file does not exist!") | |
|
84 | + talk("COMPILE: ERROR: The source file does not exist!") | |
|
84 | 85 | open(params[:message_file],"w") do |f| |
|
85 | 86 | f.puts "ERROR: The source file did not exist." |
|
86 | 87 | end |
|
87 | 88 | exit(127) |
|
88 | 89 | end |
|
89 | 90 | |
|
90 | 91 | if params[:prog_lang]=='cpp' |
|
91 | 92 | params[:prog_lang] = 'c++' |
|
92 | 93 | end |
|
93 | 94 | |
|
95 | + | |
|
94 | 96 | # Compile. |
|
95 | 97 | case params[:prog_lang] |
|
96 | 98 | |
|
97 | 99 | when "c" |
|
98 | 100 | command = "#{C_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{C_OPTIONS}" |
|
101 | + talk "COMPILE: compiling command [#{command}]" | |
|
99 | 102 | system(command, err: params[:message_file]) |
|
100 | 103 | |
|
101 | 104 | when "c++" |
|
102 | 105 | command = "#{CPLUSPLUS_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{CPLUSPLUS_OPTIONS}" |
|
106 | + talk "COMPILE: compiling command [#{command}]" | |
|
103 | 107 | system(command, err: params[:message_file]) |
|
104 | 108 | |
|
105 | 109 | when "pas" |
|
106 | 110 | command = "#{PASCAL_COMPILER} #{params[:source_file]} -ooutpas #{PASCAL_OPTIONS}" |
|
111 | + talk "COMPILE: compiling command [#{command}]" | |
|
107 | 112 | system(command,out: params[:message_file]) |
|
108 | 113 | FileUtils.mv("output", params[:output_file]) |
|
109 | 114 | |
|
110 | 115 | when "java" |
|
111 | 116 | #rename the file to the public class name |
|
112 | 117 | |
@@ -123,22 +128,24 | |||
|
123 | 128 | source.each do |s| |
|
124 | 129 | file.puts s |
|
125 | 130 | end |
|
126 | 131 | end |
|
127 | 132 | #system("cp #{params[:source_file]} #{classname}.java") |
|
128 | 133 | command = "#{JAVA_COMPILER} -encoding utf8 #{classname}.java" |
|
134 | + talk "COMPILE: compiling command [#{command}]" | |
|
129 | 135 | system(command, err: params[:message_file]) |
|
130 | 136 | if File.exists?(classname + ".class") |
|
131 | 137 | File.open(params[:output_file],"w") {|file| file.write("#{classname}")} |
|
132 | 138 | end |
|
133 | 139 | if classname == 'DUMMY' |
|
134 | 140 | File.open(params[:message_file],"w") {|file| file.write("Cannot find any public class in the source code\n")} |
|
135 | 141 | end |
|
136 | 142 | |
|
137 | 143 | when "ruby" |
|
138 | 144 | command = "#{RUBY_INTERPRETER} -c #{params[:source_file]}" |
|
145 | + talk "COMPILE: compiling command [#{command}]" | |
|
139 | 146 | if system(command, err: params[:message_file]) |
|
140 | 147 | File.open(params[:output_file],"w") do |out_file| |
|
141 | 148 | out_file.puts "#!#{RUBY_INTERPRETER}" |
|
142 | 149 | File.open(params[:source_file],"r").each do |line| |
|
143 | 150 | out_file.print line |
|
144 | 151 | end |
@@ -148,17 +155,15 | |||
|
148 | 155 | |
|
149 | 156 | when "python" |
|
150 | 157 | #command = "#{PYTHON_CHECKER} #{params[:source_file]}" |
|
151 | 158 | #if system(command, out: params[:message_file]) |
|
152 | 159 | #compile to python bytecode |
|
153 | 160 | command = "#{PYTHON_INTERPRETER} -c \"import py_compile; py_compile.compile('#{params[:source_file]}','#{params[:source_file]}c');\"" |
|
154 |
- |
|
|
161 | + talk "COMPILE: compiling command [#{command}]" | |
|
155 | 162 | system(command, err: params[:message_file]) |
|
156 | 163 | if FileTest.exists?("#{params[:source_file]}c") |
|
157 | - puts "pwd: " + Dir.pwd | |
|
158 | - Dir.new('.').each {|file| puts file} | |
|
159 | 164 | File.open(params[:output_file],"w") do |out_file| |
|
160 | 165 | out_file.puts "#!#{PYTHON_INTERPRETER} #{params[:source_file]}c" |
|
161 | 166 | end |
|
162 | 167 | File.chmod(0755, params[:output_file]) |
|
163 | 168 | FileUtils.cp("#{params[:source_file]}c",params[:output_file]) |
|
164 | 169 | end |
@@ -175,22 +180,23 | |||
|
175 | 180 | end |
|
176 | 181 | File.chmod(0755, params[:output_file]) |
|
177 | 182 | end |
|
178 | 183 | |
|
179 | 184 | when "haskell" |
|
180 | 185 | command = "#{HASKELL_COMPILER} #{params[:source_file]} -o #{params[:output_file]} #{HASKELL_OPTIONS}" |
|
186 | + talk "COMPILE: compiling command [#{command}]" | |
|
181 | 187 | system(command, err: params[:message_file]) |
|
182 | 188 | |
|
183 | 189 | else |
|
184 | - talk("ERROR: Invalid language specified!") | |
|
190 | + talk("COMPILE: ERROR: Invalid language specified!") | |
|
185 | 191 | open(params[:message_file],"w") do |f| |
|
186 | 192 | f.puts "ERROR: Invalid language specified!" |
|
187 | 193 | end |
|
188 | 194 | exit(127) |
|
189 | 195 | end |
|
190 | 196 | |
|
191 | 197 | # Report success or failure. |
|
192 | 198 | if FileTest.exists? params[:output_file] |
|
193 |
- talk "Compilation was successful!" |
|
|
199 | + talk "COMPILE: Compilation was successful!" | |
|
194 | 200 | else |
|
195 | - talk "ERROR: Something was wrong during the compilation!" | |
|
201 | + talk "COMPILE: ERROR: Something was wrong during the compilation!" | |
|
196 | 202 | end |
@@ -25,13 +25,13 | |||
|
25 | 25 | end |
|
26 | 26 | |
|
27 | 27 | def call_and_log(error_message) |
|
28 | 28 | begin |
|
29 | 29 | yield |
|
30 | 30 | rescue |
|
31 | - msg = "ERROR: #{error_message}" | |
|
31 | + msg = "JUDGE: ERROR: #{error_message}" | |
|
32 | 32 | log msg |
|
33 | 33 | raise msg |
|
34 | 34 | end |
|
35 | 35 | end |
|
36 | 36 | |
|
37 | 37 | def clear_and_create_empty_dir(dir) |
@@ -51,92 +51,98 | |||
|
51 | 51 | puts "WARNING: The judge script will forcefully create the (implicitly and explicitly) specified directories and remove anything inside it." |
|
52 | 52 | exit(127) |
|
53 | 53 | end |
|
54 | 54 | |
|
55 | 55 | language = ARGV[0] |
|
56 | 56 | if language != "c" && language != "c++" && language != "pas" && language != "java" && language != "ruby" && language != "python" && language != "php" && language != "haskell" |
|
57 | - log "You specified a language that is not supported: #{language}." | |
|
57 | + log "JUDGE: You specified a language that is not supported: #{language}." | |
|
58 | 58 | exit(127) |
|
59 | 59 | end |
|
60 | 60 | |
|
61 | 61 | source_file = ARGV[1] |
|
62 | 62 | ENV['SOURCE_NAME'] = source_file |
|
63 | 63 | if File.exist?(source_file) == false |
|
64 | - log "The source file does not exist." | |
|
64 | + log "JUDGE: The source file does not exist." | |
|
65 | 65 | exit(127) |
|
66 | 66 | end |
|
67 | 67 | |
|
68 | - log "Making test result and sandbox directories..." | |
|
68 | + log "JUDGE: Making test result and sandbox directories..." | |
|
69 | 69 | |
|
70 | 70 | current_dir = FileUtils.pwd |
|
71 | 71 | current_dir.strip! |
|
72 | 72 | |
|
73 | 73 | if ARGV.length >= 3 |
|
74 | 74 | test_result_dir = ARGV[2] |
|
75 | 75 | else |
|
76 | 76 | test_result_dir = "#{current_dir}/test-result" |
|
77 | 77 | end |
|
78 | 78 | |
|
79 | - log "Test result directory: #{test_result_dir}" | |
|
79 | + log "JUDGE: Test result directory: #{test_result_dir}" | |
|
80 | 80 | clear_and_create_empty_dir(test_result_dir) |
|
81 | 81 | |
|
82 | 82 | if ARGV.length >= 4 |
|
83 | 83 | sandbox_dir = ARGV[3] |
|
84 | 84 | else |
|
85 | 85 | sandbox_dir = "#{current_dir}/sandbox" |
|
86 | 86 | end |
|
87 | - log "Sandbox directory: #{sandbox_dir}" | |
|
87 | + log "JUDGE: Sandbox directory: #{sandbox_dir}" | |
|
88 | 88 | clear_and_create_empty_dir(sandbox_dir) |
|
89 | 89 | |
|
90 | + # ------------------------------ | |
|
90 | 91 | # Compile |
|
92 | + # ------------------------------ | |
|
93 | + log "JUDGE: Compiling..." | |
|
91 | 94 | log |
|
92 | - log "Compiling..." | |
|
93 | 95 | call_and_log("Cannot copy the source file to #{sandbox_dir}") { |
|
94 | 96 | FileUtils.cp(source_file, sandbox_dir) |
|
95 | 97 | } |
|
96 | 98 | begin |
|
97 | 99 | Dir.chdir sandbox_dir |
|
98 | 100 | rescue |
|
99 | - log "ERROR: Cannot change directory to #{sandbox_dir}." | |
|
101 | + log "JUDGE: ERROR: Cannot change directory to #{sandbox_dir}." | |
|
100 | 102 | exit(127) |
|
101 | 103 | end |
|
102 | 104 | execute("#{problem_home}/script/compile #{language} #{source_file}", "Compilation error!") |
|
103 | 105 | compile_message = open("compiler_message").read |
|
104 | 106 | compile_message.strip! |
|
105 | 107 | call_and_log("Cannot move the compiler message to #{test_result_dir}.") { |
|
106 | 108 | FileUtils.mv("compiler_message", test_result_dir) |
|
107 | 109 | } |
|
108 | 110 | if !FileTest.exist?("a.out") |
|
109 | - log "Cannot compile the source code. See message in #{test_result_dir}/compile_message" | |
|
111 | + log "JUDGE: EROOR: Cannot compile the source code. See message in #{test_result_dir}/compile_message" | |
|
110 | 112 | exit(127) |
|
111 | 113 | else |
|
112 | 114 | call_and_log("Cannot move the compiled program to #{test_result_dir}") { |
|
113 | 115 | FileUtils.mv("a.out",test_result_dir) |
|
114 | 116 | if language == "java" then Dir["*.class"].each { |file| FileUtils.mv(file,test_result_dir)} end |
|
115 | 117 | if language == "python" then Dir["*.pyc"].each { |file| FileUtils.mv(file,test_result_dir)} end |
|
116 | 118 | } |
|
117 | 119 | FileUtils.rm_rf("#{sandbox_dir}/.") |
|
118 | 120 | end |
|
119 | 121 | |
|
122 | + | |
|
123 | + #----------------------------------------------- | |
|
124 | + # run | |
|
125 | + #----------------------------------------------- | |
|
120 | 126 | require "#{problem_home}/script/test_dsl.rb" |
|
121 | 127 | load "#{problem_home}/test_cases/all_tests.cfg" |
|
122 | 128 | problem = Problem.get_instance |
|
123 | 129 | |
|
124 | 130 | if problem.well_formed? == false |
|
125 | 131 | log "The problem specification is not well formed." |
|
126 | 132 | exit(127) |
|
127 | 133 | end |
|
128 | 134 | |
|
129 | 135 | # Doing the testing. |
|
136 | + log | |
|
137 | + log "JUDGE: Running each test case..." | |
|
130 | 138 | (1..(problem.num_tests)).each do |test_num| |
|
131 | 139 | |
|
132 | 140 | $stdout.print "[#{test_num}]" |
|
133 | 141 | $stdout.flush |
|
134 | 142 | |
|
135 | - log "Test number: #{test_num}" | |
|
136 | - | |
|
137 | 143 | call_and_log("Cannot copy the compiled program into #{sandbox_dir}") { |
|
138 | 144 | FileUtils.cp("#{test_result_dir}/a.out", sandbox_dir, :preserve => true) |
|
139 | 145 | if language == "java" then Dir["#{test_result_dir}/*.class"].each { |file| FileUtils.cp(file,sandbox_dir)} end |
|
140 | 146 | if language == "python" then Dir["#{test_result_dir}/*.pyc"].each { |file| FileUtils.cp(file,sandbox_dir)} end |
|
141 | 147 | } |
|
142 | 148 | |
@@ -170,13 +176,13 | |||
|
170 | 176 | end |
|
171 | 177 | |
|
172 | 178 | $stdout.print "[done]\n" |
|
173 | 179 | |
|
174 | 180 | # Grade |
|
175 | 181 | log |
|
176 | - log "Grading..." | |
|
182 | + log "JUDGE: Grading..." | |
|
177 | 183 | begin |
|
178 | 184 | Dir.chdir test_result_dir |
|
179 | 185 | rescue |
|
180 | 186 | log "ERROR: Cannot change directory to #{test_result_dir}." |
|
181 | 187 | exit(127) |
|
182 | 188 | end |
@@ -48,19 +48,19 | |||
|
48 | 48 | load "#{problem_home}/test_cases/all_tests.cfg" |
|
49 | 49 | problem = Problem.get_instance |
|
50 | 50 | |
|
51 | 51 | sandbox_dir = Dir.getwd |
|
52 | 52 | |
|
53 | 53 | if problem.well_formed? == false |
|
54 | - log "The problem specification is not well formed." | |
|
54 | + log "RUN: The problem specification is not well formed." | |
|
55 | 55 | exit(127) |
|
56 | 56 | end |
|
57 | 57 | |
|
58 | 58 | # Check if the test number is okay. |
|
59 | 59 | if test_num <= 0 || test_num > problem.num_tests |
|
60 | - log "You have specified a wrong test number." | |
|
60 | + log "RUN: You have specified a wrong test number." | |
|
61 | 61 | exit(127) |
|
62 | 62 | end |
|
63 | 63 | |
|
64 | 64 | ##################################### |
|
65 | 65 | # Set the relavant file names here. # |
|
66 | 66 | ##################################### |
@@ -116,14 +116,14 | |||
|
116 | 116 | run_command = "#{problem_home}/script/box -a 2 -f -T -t #{time_limit*=2} -m #{[512 * 1024,mem_limit].max} #{PHP_OPTION} -i #{input_file_name} -o output.txt /usr/bin/php -A -d -A memory_limit=#{mem_limit}k -A #{program_name} " |
|
117 | 117 | else # for c++, pascal, we do the normal checking |
|
118 | 118 | run_command = "#{problem_home}/script/box -a 2 -f -T -t #{time_limit} -m #{mem_limit} -i #{input_file_name} -o output.txt #{program_name} " |
|
119 | 119 | end |
|
120 | 120 | |
|
121 | 121 | |
|
122 | - log "Running test #{test_num}..." | |
|
123 | - log run_command | |
|
122 | + log "RUN: Running test #{test_num}..." | |
|
123 | + log "RUN: Run command = [#{run_command}]" | |
|
124 | 124 | log |
|
125 | 125 | system(run_command,err: 'run_result') |
|
126 | 126 | |
|
127 | 127 | # Restore PROBLEM_HOME |
|
128 | 128 | ENV['PROBLEM_HOME'] = problem_home |
|
129 | 129 |
You need to be logged in to leave comments.
Login now