|
|
/*
|
|
|
* Process Isolator -- Rules
|
|
|
*
|
|
|
* (c) 2012-2018 Martin Mares <mj@ucw.cz>
|
|
|
* (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au>
|
|
|
*/
|
|
|
|
|
|
#include "isolate.h"
|
|
|
|
|
|
#include <limits.h>
|
|
|
#include <mntent.h>
|
|
|
#include <stdio.h>
|
|
|
#include <stdlib.h>
|
|
|
#include <string.h>
|
|
|
#include <sys/capability.h>
|
|
|
#include <sys/mount.h>
|
|
|
#include <sys/quota.h>
|
|
|
#include <sys/stat.h>
|
|
|
#include <sys/vfs.h>
|
|
|
#include <unistd.h>
|
|
|
|
|
|
/*** Environment rules ***/
|
|
|
|
|
|
struct env_rule {
|
|
|
char *var; // Variable to match
|
|
|
char *val; // ""=clear, NULL=inherit
|
|
|
int var_len;
|
|
|
struct env_rule *next;
|
|
|
};
|
|
|
|
|
|
static struct env_rule *first_env_rule;
|
|
|
static struct env_rule **last_env_rule = &first_env_rule;
|
|
|
|
|
|
static struct env_rule default_env_rules[] = {
|
|
|
{ .var = "LIBC_FATAL_STDERR_", .val = "1", .var_len = 18 },
|
|
|
};
|
|
|
|
|
|
int
|
|
|
set_env_action(char *a0)
|
|
|
{
|
|
|
struct env_rule *r = xmalloc(sizeof(*r) + strlen(a0) + 1);
|
|
|
char *a = (char *)(r+1);
|
|
|
strcpy(a, a0);
|
|
|
|
|
|
char *sep = strchr(a, '=');
|
|
|
if (sep == a)
|
|
|
return 0;
|
|
|
r->var = a;
|
|
|
if (sep)
|
|
|
{
|
|
|
*sep++ = 0;
|
|
|
r->val = sep;
|
|
|
}
|
|
|
else
|
|
|
r->val = NULL;
|
|
|
*last_env_rule = r;
|
|
|
last_env_rule = &r->next;
|
|
|
r->next = NULL;
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
match_env_var(char *env_entry, struct env_rule *r)
|
|
|
{
|
|
|
if (strncmp(env_entry, r->var, r->var_len))
|
|
|
return 0;
|
|
|
return (env_entry[r->var_len] == '=');
|
|
|
}
|
|
|
|
|
|
static void
|
|
|
apply_env_rule(char **env, int *env_sizep, struct env_rule *r)
|
|
|
{
|
|
|
// First remove the variable if already set
|
|
|
int pos = 0;
|
|
|
while (pos < *env_sizep && !match_env_var(env[pos], r))
|
|
|
pos++;
|
|
|
if (pos < *env_sizep)
|
|
|
{
|
|
|
(*env_sizep)--;
|
|
|
env[pos] = env[*env_sizep];
|
|
|
env[*env_sizep] = NULL;
|
|
|
}
|
|
|
|
|
|
// What is the new value?
|
|
|
char *new;
|
|
|
if (r->val)
|
|
|
{
|
|
|
if (!r->val[0])
|
|
|
return;
|
|
|
new = xmalloc(r->var_len + 1 + strlen(r->val) + 1);
|
|
|
sprintf(new, "%s=%s", r->var, r->val);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
pos = 0;
|
|
|
while (environ[pos] && !match_env_var(environ[pos], r))
|
|
|
pos++;
|
|
|
if (!(new = environ[pos]))
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
// Add it at the end of the array
|
|
|
env[(*env_sizep)++] = new;
|
|
|
env[*env_sizep] = NULL;
|
|
|
}
|
|
|
|
|
|
char **
|
|
|
setup_environment(void)
|
|
|
{
|
|
|
// Link built-in rules with user rules
|
|
|
for (int i=ARRAY_SIZE(default_env_rules)-1; i >= 0; i--)
|
|
|
{
|
|
|
default_env_rules[i].next = first_env_rule;
|
|
|
first_env_rule = &default_env_rules[i];
|
|
|
}
|
|
|
|
|
|
// Scan the original environment
|
|
|
char **orig_env = environ;
|
|
|
int orig_size = 0;
|
|
|
while (orig_env[orig_size])
|
|
|
orig_size++;
|
|
|
|
|
|
// For each rule, reserve one more slot and calculate length
|
|
|
int num_rules = 0;
|
|
|
for (struct env_rule *r = first_env_rule; r; r=r->next)
|
|
|
{
|
|
|
num_rules++;
|
|
|
r->var_len = strlen(r->var);
|
|
|
}
|
|
|
|
|
|
// Create a new environment
|
|
|
char **env = xmalloc((orig_size + num_rules + 1) * sizeof(char *));
|
|
|
int size;
|
|
|
if (pass_environ)
|
|
|
{
|
|
|
memcpy(env, environ, orig_size * sizeof(char *));
|
|
|
size = orig_size;
|
|
|
}
|
|
|
else
|
|
|
size = 0;
|
|
|
env[size] = NULL;
|
|
|
|
|
|
// Apply the rules one by one
|
|
|
for (struct env_rule *r = first_env_rule; r; r=r->next)
|
|
|
apply_env_rule(env, &size, r);
|
|
|
|
|
|
// Return the new env and pass some gossip
|
|
|
if (verbose > 1)
|
|
|
{
|
|
|
fprintf(stderr, "Passing environment:\n");
|
|
|
for (int i=0; env[i]; i++)
|
|
|
fprintf(stderr, "\t%s\n", env[i]);
|
|
|
}
|
|
|
return env;
|
|
|
}
|
|
|
|
|
|
/*** Directory rules ***/
|
|
|
|
|
|
struct dir_rule {
|
|
|
char *inside; // A relative path
|
|
|
char *outside; // This can be an absolute path or a relative path starting with "./"
|
|
|
unsigned int flags; // DIR_FLAG_xxx
|
|
|
struct dir_rule *next;
|
|
|
};
|
|
|
|
|
|
enum dir_rule_flags {
|
|
|
DIR_FLAG_RW = 1,
|
|
|
DIR_FLAG_NOEXEC = 2,
|
|
|
DIR_FLAG_FS = 4,
|
|
|
DIR_FLAG_MAYBE = 8,
|
|
|
DIR_FLAG_DEV = 16,
|
|
|
DIR_FLAG_DEFAULT = 1U << 15, // Used internally
|
|
|
DIR_FLAG_DISABLED = 1U << 16, // Used internally
|
|
|
};
|
|
|
|
|
|
static const char * const dir_flag_names[] = { "rw", "noexec", "fs", "maybe", "dev" };
|
|
|
|
|
|
static struct dir_rule *first_dir_rule;
|
|
|
static struct dir_rule **last_dir_rule = &first_dir_rule;
|
|
|
|
|
|
static char *
|
|
|
sanitize_dir_path(char *path)
|
|
|
{
|
|
|
// Strip leading slashes
|
|
|
while (*path == '/')
|
|
|
path++;
|
|
|
if (!*path)
|
|
|
return NULL;
|
|
|
|
|
|
// Check for ".." components
|
|
|
char *p = path;
|
|
|
while (*p)
|
|
|
{
|
|
|
char *next = strchr(p, '/');
|
|
|
if (!next)
|
|
|
next = p + strlen(p);
|
|
|
|
|
|
int len = next - p;
|
|
|
if (len == 2 && !memcmp(p, "..", 2))
|
|
|
return NULL;
|
|
|
|
|
|
p = *next ? next+1 : next;
|
|
|
}
|
|
|
|
|
|
return path;
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
add_dir_rule(char *in, char *out, unsigned int flags)
|
|
|
{
|
|
|
// Make sure that "in" does not try to escape the box
|
|
|
in = sanitize_dir_path(in);
|
|
|
if (!in)
|
|
|
return 0;
|
|
|
|
|
|
// Check "out"
|
|
|
if (flags & DIR_FLAG_FS)
|
|
|
{
|
|
|
if (!out || out[0] == '/')
|
|
|
return 0;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
if (out && out[0] != '/' && strncmp(out, "./", 2))
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
// Override an existing rule
|
|
|
struct dir_rule *r;
|
|
|
for (r = first_dir_rule; r; r = r->next)
|
|
|
if (!strcmp(r->inside, in))
|
|
|
break;
|
|
|
|
|
|
// Add a new rule
|
|
|
if (!r)
|
|
|
{
|
|
|
r = xmalloc(sizeof(*r));
|
|
|
r->inside = in;
|
|
|
*last_dir_rule = r;
|
|
|
last_dir_rule = &r->next;
|
|
|
r->next = NULL;
|
|
|
}
|
|
|
r->outside = out;
|
|
|
r->flags = flags;
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
static unsigned int
|
|
|
parse_dir_option(char *opt)
|
|
|
{
|
|
|
for (unsigned int i = 0; i < ARRAY_SIZE(dir_flag_names); i++)
|
|
|
if (!strcmp(opt, dir_flag_names[i]))
|
|
|
return 1U << i;
|
|
|
die("Unknown directory option %s", opt);
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
set_dir_action_ext(char *arg, unsigned int ext_flags)
|
|
|
{
|
|
|
arg = xstrdup(arg);
|
|
|
|
|
|
char *colon = strchr(arg, ':');
|
|
|
unsigned int flags = ext_flags;
|
|
|
while (colon)
|
|
|
{
|
|
|
*colon++ = 0;
|
|
|
char *next = strchr(colon, ':');
|
|
|
if (next)
|
|
|
*next = 0;
|
|
|
flags |= parse_dir_option(colon);
|
|
|
colon = next;
|
|
|
}
|
|
|
|
|
|
char *eq = strchr(arg, '=');
|
|
|
if (eq)
|
|
|
{
|
|
|
*eq++ = 0;
|
|
|
return add_dir_rule(arg, (*eq ? eq : NULL), flags);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
char *out = xmalloc(1 + strlen(arg) + 1);
|
|
|
sprintf(out, "/%s", arg);
|
|
|
return add_dir_rule(arg, out, flags);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
int
|
|
|
set_dir_action(char *arg)
|
|
|
{
|
|
|
return set_dir_action_ext(arg, 0);
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
set_dir_action_default(char *arg)
|
|
|
{
|
|
|
return set_dir_action_ext(arg, DIR_FLAG_DEFAULT);
|
|
|
}
|
|
|
|
|
|
void
|
|
|
init_dir_rules(void)
|
|
|
{
|
|
|
set_dir_action_default("box=./box:rw");
|
|
|
set_dir_action_default("bin");
|
|
|
set_dir_action_default("dev:dev");
|
|
|
set_dir_action_default("lib");
|
|
|
set_dir_action_default("lib64:maybe");
|
|
|
set_dir_action_default("proc=proc:fs");
|
|
|
set_dir_action_default("usr");
|
|
|
}
|
|
|
|
|
|
static void
|
|
|
set_cap_sys_admin(void)
|
|
|
{
|
|
|
cap_t caps;
|
|
|
if (!(caps = cap_get_proc()))
|
|
|
die("Cannot get capabilities: %m");
|
|
|
|
|
|
cap_value_t cap_list[] = { CAP_SYS_ADMIN };
|
|
|
if (cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_SET) < 0)
|
|
|
die("Cannot modify capabilities");
|
|
|
|
|
|
if (cap_set_proc(caps) < 0)
|
|
|
die("Cannot set capabilities: %m");
|
|
|
|
|
|
cap_free(caps);
|
|
|
}
|
|
|
|
|
|
void
|
|
|
apply_dir_rules(int with_defaults)
|
|
|
{
|
|
|
/*
|
|
|
* Before mounting anything, we create all mount points inside the box.
|
|
|
* This is necessary to avoid bypassing directory permissions. If you
|
|
|
* want nested binds, you have to create the mount points explicitly.
|
|
|
*/
|
|
|
for (struct dir_rule *r = first_dir_rule; r; r=r->next)
|
|
|
{
|
|
|
if (!with_defaults && (r->flags & DIR_FLAG_DEFAULT))
|
|
|
continue;
|
|
|
|
|
|
char *in = r->inside;
|
|
|
char *out = r->outside;
|
|
|
|
|
|
if (!out)
|
|
|
{
|
|
|
msg("Not binding anything on %s\n", in);
|
|
|
r->flags |= DIR_FLAG_DISABLED;
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
if ((r->flags & DIR_FLAG_MAYBE) && !dir_exists(out))
|
|
|
{
|
|
|
msg("Not binding %s on %s (does not exist)\n", out, r->inside);
|
|
|
r->flags |= DIR_FLAG_DISABLED;
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
char root_in[1024];
|
|
|
snprintf(root_in, sizeof(root_in), "root/%s", in);
|
|
|
make_dir(root_in);
|
|
|
}
|
|
|
|
|
|
for (struct dir_rule *r = first_dir_rule; r; r=r->next)
|
|
|
{
|
|
|
if (r->flags & DIR_FLAG_DISABLED)
|
|
|
continue;
|
|
|
if (!with_defaults && (r->flags & DIR_FLAG_DEFAULT))
|
|
|
continue;
|
|
|
|
|
|
char *in = r->inside;
|
|
|
char *out = r->outside;
|
|
|
char root_in[1024];
|
|
|
snprintf(root_in, sizeof(root_in), "root/%s", in);
|
|
|
|
|
|
unsigned long mount_flags = 0;
|
|
|
if (!(r->flags & DIR_FLAG_RW))
|
|
|
mount_flags |= MS_RDONLY;
|
|
|
if (r->flags & DIR_FLAG_NOEXEC)
|
|
|
mount_flags |= MS_NOEXEC;
|
|
|
if (!(r->flags & DIR_FLAG_DEV))
|
|
|
mount_flags |= MS_NODEV;
|
|
|
|
|
|
if (r->flags & DIR_FLAG_FS)
|
|
|
{
|
|
|
msg("Mounting %s on %s (flags %lx)\n", out, in, mount_flags);
|
|
|
if (mount("none", root_in, out, mount_flags, "") < 0)
|
|
|
die("Cannot mount %s on %s: %m", out, in);
|
|
|
if (!strcmp(in, "proc"))
|
|
|
{
|
|
|
// If we are mounting procfs, add hidepid=2, so that only the processes
|
|
|
// of the same user are visible. This has to be done as a remount.
|
|
|
if (mount("none", root_in, out, MS_REMOUNT | mount_flags, "hidepid=2") < 0)
|
|
|
die("Cannot re-mount proc with hidepid option: %m");
|
|
|
}
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
mount_flags |= MS_BIND | MS_NOSUID;
|
|
|
msg("Binding %s on %s (flags %lx)\n", out, in, mount_flags);
|
|
|
|
|
|
/*
|
|
|
* This is tricky. We cannot run mount() with root privileges, since
|
|
|
* it could be used to bypass access control if the mounted path
|
|
|
* contains elements inaccessible to the user running isolate.
|
|
|
*
|
|
|
* We switch effective UID and GID back to the calling user (which clears
|
|
|
* all capabilities, but keeps them in the permitted set) and then
|
|
|
* enable CAP_SYS_ADMIN. So we have CAP_SYS_ADMIN (needed for mount),
|
|
|
* but not CAP_DAC_OVERRIDE (which allows to bypass permission checks).
|
|
|
*/
|
|
|
|
|
|
if (setresuid(orig_uid, orig_uid, 0) < 0 ||
|
|
|
setresgid(orig_gid, orig_gid, 0) < 0)
|
|
|
die("Cannot switch UID and GID: %m");
|
|
|
|
|
|
set_cap_sys_admin();
|
|
|
|
|
|
// Most mount flags need remount to work
|
|
|
if (mount(out, root_in, "none", mount_flags, "") < 0 ||
|
|
|
mount(out, root_in, "none", MS_REMOUNT | mount_flags, "") < 0)
|
|
|
die("Cannot mount %s on %s: %m", out, in);
|
|
|
|
|
|
if (setresuid(orig_uid, 0, orig_uid) < 0 ||
|
|
|
setresgid(orig_gid, 0, orig_gid) < 0)
|
|
|
die("Cannot switch UID and GID: %m");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/*** Disk quotas ***/
|
|
|
|
|
|
static int
|
|
|
path_begins_with(char *path, char *with)
|
|
|
{
|
|
|
while (*with)
|
|
|
if (*path++ != *with++)
|
|
|
return 0;
|
|
|
return (!*with || *with == '/');
|
|
|
}
|
|
|
|
|
|
static char *
|
|
|
find_device(char *path)
|
|
|
{
|
|
|
FILE *f = setmntent("/proc/mounts", "r");
|
|
|
if (!f)
|
|
|
die("Cannot open /proc/mounts: %m");
|
|
|
|
|
|
struct mntent *me;
|
|
|
int best_len = 0;
|
|
|
char *best_dev = NULL;
|
|
|
while (me = getmntent(f))
|
|
|
{
|
|
|
if (!path_begins_with(me->mnt_fsname, "/dev"))
|
|
|
continue;
|
|
|
if (path_begins_with(path, me->mnt_dir))
|
|
|
{
|
|
|
int len = strlen(me->mnt_dir);
|
|
|
if (len > best_len)
|
|
|
{
|
|
|
best_len = len;
|
|
|
free(best_dev);
|
|
|
best_dev = xstrdup(me->mnt_fsname);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
endmntent(f);
|
|
|
return best_dev;
|
|
|
}
|
|
|
|
|
|
void
|
|
|
set_quota(void)
|
|
|
{
|
|
|
if (!block_quota)
|
|
|
return;
|
|
|
|
|
|
char cwd[PATH_MAX];
|
|
|
if (!getcwd(cwd, sizeof(cwd)))
|
|
|
die("getcwd: %m");
|
|
|
|
|
|
char *dev = find_device(cwd);
|
|
|
if (!dev)
|
|
|
die("Cannot identify filesystem which contains %s", cwd);
|
|
|
msg("Quota: Mapped path %s to a filesystem on %s\n", cwd, dev);
|
|
|
|
|
|
// Sanity check
|
|
|
struct stat dev_st, cwd_st;
|
|
|
if (stat(dev, &dev_st) < 0)
|
|
|
die("Cannot identify block device %s: %m", dev);
|
|
|
if (!S_ISBLK(dev_st.st_mode))
|
|
|
die("Expected that %s is a block device", dev);
|
|
|
if (stat(".", &cwd_st) < 0)
|
|
|
die("Cannot stat cwd: %m");
|
|
|
if (cwd_st.st_dev != dev_st.st_rdev)
|
|
|
die("Identified %s as a filesystem on %s, but it is obviously false", cwd, dev);
|
|
|
|
|
|
struct dqblk dq = {
|
|
|
.dqb_bhardlimit = block_quota,
|
|
|
.dqb_bsoftlimit = block_quota,
|
|
|
.dqb_ihardlimit = inode_quota,
|
|
|
.dqb_isoftlimit = inode_quota,
|
|
|
.dqb_valid = QIF_LIMITS,
|
|
|
};
|
|
|
if (quotactl(QCMD(Q_SETQUOTA, USRQUOTA), dev, box_uid, (caddr_t) &dq) < 0)
|
|
|
die("Cannot set disk quota: %m");
|
|
|
msg("Quota: Set block quota %d and inode quota %d\n", block_quota, inode_quota);
|
|
|
|
|
|
free(dev);
|
|
|
}
|
|
|
|