Show More
Commit Description:
- add isolate...
Commit Description:
- add isolate - more comment and output for each script
File last commit:
Show/Diff file:
Action:
isolate/rules.c | 509 lines | 11.5 KiB | text/x-c | CLexer |
/*
* Process Isolator -- Rules
*
* (c) 2012-2018 Martin Mares <mj@ucw.cz>
* (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au>
*/
#include "isolate.h"
#include <limits.h>
#include <mntent.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/capability.h>
#include <sys/mount.h>
#include <sys/quota.h>
#include <sys/stat.h>
#include <sys/vfs.h>
#include <unistd.h>
/*** Environment rules ***/
struct env_rule {
char *var; // Variable to match
char *val; // ""=clear, NULL=inherit
int var_len;
struct env_rule *next;
};
static struct env_rule *first_env_rule;
static struct env_rule **last_env_rule = &first_env_rule;
static struct env_rule default_env_rules[] = {
{ .var = "LIBC_FATAL_STDERR_", .val = "1", .var_len = 18 },
};
int
set_env_action(char *a0)
{
struct env_rule *r = xmalloc(sizeof(*r) + strlen(a0) + 1);
char *a = (char *)(r+1);
strcpy(a, a0);
char *sep = strchr(a, '=');
if (sep == a)
return 0;
r->var = a;
if (sep)
{
*sep++ = 0;
r->val = sep;
}
else
r->val = NULL;
*last_env_rule = r;
last_env_rule = &r->next;
r->next = NULL;
return 1;
}
static int
match_env_var(char *env_entry, struct env_rule *r)
{
if (strncmp(env_entry, r->var, r->var_len))
return 0;
return (env_entry[r->var_len] == '=');
}
static void
apply_env_rule(char **env, int *env_sizep, struct env_rule *r)
{
// First remove the variable if already set
int pos = 0;
while (pos < *env_sizep && !match_env_var(env[pos], r))
pos++;
if (pos < *env_sizep)
{
(*env_sizep)--;
env[pos] = env[*env_sizep];
env[*env_sizep] = NULL;
}
// What is the new value?
char *new;
if (r->val)
{
if (!r->val[0])
return;
new = xmalloc(r->var_len + 1 + strlen(r->val) + 1);
sprintf(new, "%s=%s", r->var, r->val);
}
else
{
pos = 0;
while (environ[pos] && !match_env_var(environ[pos], r))
pos++;
if (!(new = environ[pos]))
return;
}
// Add it at the end of the array
env[(*env_sizep)++] = new;
env[*env_sizep] = NULL;
}
char **
setup_environment(void)
{
// Link built-in rules with user rules
for (int i=ARRAY_SIZE(default_env_rules)-1; i >= 0; i--)
{
default_env_rules[i].next = first_env_rule;
first_env_rule = &default_env_rules[i];
}
// Scan the original environment
char **orig_env = environ;
int orig_size = 0;
while (orig_env[orig_size])
orig_size++;
// For each rule, reserve one more slot and calculate length
int num_rules = 0;
for (struct env_rule *r = first_env_rule; r; r=r->next)
{
num_rules++;
r->var_len = strlen(r->var);
}
// Create a new environment
char **env = xmalloc((orig_size + num_rules + 1) * sizeof(char *));
int size;
if (pass_environ)
{
memcpy(env, environ, orig_size * sizeof(char *));
size = orig_size;
}
else
size = 0;
env[size] = NULL;
// Apply the rules one by one
for (struct env_rule *r = first_env_rule; r; r=r->next)
apply_env_rule(env, &size, r);
// Return the new env and pass some gossip
if (verbose > 1)
{
fprintf(stderr, "Passing environment:\n");
for (int i=0; env[i]; i++)
fprintf(stderr, "\t%s\n", env[i]);
}
return env;
}
/*** Directory rules ***/
struct dir_rule {
char *inside; // A relative path
char *outside; // This can be an absolute path or a relative path starting with "./"
unsigned int flags; // DIR_FLAG_xxx
struct dir_rule *next;
};
enum dir_rule_flags {
DIR_FLAG_RW = 1,
DIR_FLAG_NOEXEC = 2,
DIR_FLAG_FS = 4,
DIR_FLAG_MAYBE = 8,
DIR_FLAG_DEV = 16,
DIR_FLAG_DEFAULT = 1U << 15, // Used internally
DIR_FLAG_DISABLED = 1U << 16, // Used internally
};
static const char * const dir_flag_names[] = { "rw", "noexec", "fs", "maybe", "dev" };
static struct dir_rule *first_dir_rule;
static struct dir_rule **last_dir_rule = &first_dir_rule;
static char *
sanitize_dir_path(char *path)
{
// Strip leading slashes
while (*path == '/')
path++;
if (!*path)
return NULL;
// Check for ".." components
char *p = path;
while (*p)
{
char *next = strchr(p, '/');
if (!next)
next = p + strlen(p);
int len = next - p;
if (len == 2 && !memcmp(p, "..", 2))
return NULL;
p = *next ? next+1 : next;
}
return path;
}
static int
add_dir_rule(char *in, char *out, unsigned int flags)
{
// Make sure that "in" does not try to escape the box
in = sanitize_dir_path(in);
if (!in)
return 0;
// Check "out"
if (flags & DIR_FLAG_FS)
{
if (!out || out[0] == '/')
return 0;
}
else
{
if (out && out[0] != '/' && strncmp(out, "./", 2))
return 0;
}
// Override an existing rule
struct dir_rule *r;
for (r = first_dir_rule; r; r = r->next)
if (!strcmp(r->inside, in))
break;
// Add a new rule
if (!r)
{
r = xmalloc(sizeof(*r));
r->inside = in;
*last_dir_rule = r;
last_dir_rule = &r->next;
r->next = NULL;
}
r->outside = out;
r->flags = flags;
return 1;
}
static unsigned int
parse_dir_option(char *opt)
{
for (unsigned int i = 0; i < ARRAY_SIZE(dir_flag_names); i++)
if (!strcmp(opt, dir_flag_names[i]))
return 1U << i;
die("Unknown directory option %s", opt);
}
static int
set_dir_action_ext(char *arg, unsigned int ext_flags)
{
arg = xstrdup(arg);
char *colon = strchr(arg, ':');
unsigned int flags = ext_flags;
while (colon)
{
*colon++ = 0;
char *next = strchr(colon, ':');
if (next)
*next = 0;
flags |= parse_dir_option(colon);
colon = next;
}
char *eq = strchr(arg, '=');
if (eq)
{
*eq++ = 0;
return add_dir_rule(arg, (*eq ? eq : NULL), flags);
}
else
{
char *out = xmalloc(1 + strlen(arg) + 1);
sprintf(out, "/%s", arg);
return add_dir_rule(arg, out, flags);
}
}
int
set_dir_action(char *arg)
{
return set_dir_action_ext(arg, 0);
}
static int
set_dir_action_default(char *arg)
{
return set_dir_action_ext(arg, DIR_FLAG_DEFAULT);
}
void
init_dir_rules(void)
{
set_dir_action_default("box=./box:rw");
set_dir_action_default("bin");
set_dir_action_default("dev:dev");
set_dir_action_default("lib");
set_dir_action_default("lib64:maybe");
set_dir_action_default("proc=proc:fs");
set_dir_action_default("usr");
}
static void
set_cap_sys_admin(void)
{
cap_t caps;
if (!(caps = cap_get_proc()))
die("Cannot get capabilities: %m");
cap_value_t cap_list[] = { CAP_SYS_ADMIN };
if (cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_SET) < 0)
die("Cannot modify capabilities");
if (cap_set_proc(caps) < 0)
die("Cannot set capabilities: %m");
cap_free(caps);
}
void
apply_dir_rules(int with_defaults)
{
/*
* Before mounting anything, we create all mount points inside the box.
* This is necessary to avoid bypassing directory permissions. If you
* want nested binds, you have to create the mount points explicitly.
*/
for (struct dir_rule *r = first_dir_rule; r; r=r->next)
{
if (!with_defaults && (r->flags & DIR_FLAG_DEFAULT))
continue;
char *in = r->inside;
char *out = r->outside;
if (!out)
{
msg("Not binding anything on %s\n", in);
r->flags |= DIR_FLAG_DISABLED;
continue;
}
if ((r->flags & DIR_FLAG_MAYBE) && !dir_exists(out))
{
msg("Not binding %s on %s (does not exist)\n", out, r->inside);
r->flags |= DIR_FLAG_DISABLED;
continue;
}
char root_in[1024];
snprintf(root_in, sizeof(root_in), "root/%s", in);
make_dir(root_in);
}
for (struct dir_rule *r = first_dir_rule; r; r=r->next)
{
if (r->flags & DIR_FLAG_DISABLED)
continue;
if (!with_defaults && (r->flags & DIR_FLAG_DEFAULT))
continue;
char *in = r->inside;
char *out = r->outside;
char root_in[1024];
snprintf(root_in, sizeof(root_in), "root/%s", in);
unsigned long mount_flags = 0;
if (!(r->flags & DIR_FLAG_RW))
mount_flags |= MS_RDONLY;
if (r->flags & DIR_FLAG_NOEXEC)
mount_flags |= MS_NOEXEC;
if (!(r->flags & DIR_FLAG_DEV))
mount_flags |= MS_NODEV;
if (r->flags & DIR_FLAG_FS)
{
msg("Mounting %s on %s (flags %lx)\n", out, in, mount_flags);
if (mount("none", root_in, out, mount_flags, "") < 0)
die("Cannot mount %s on %s: %m", out, in);
if (!strcmp(in, "proc"))
{
// If we are mounting procfs, add hidepid=2, so that only the processes
// of the same user are visible. This has to be done as a remount.
if (mount("none", root_in, out, MS_REMOUNT | mount_flags, "hidepid=2") < 0)
die("Cannot re-mount proc with hidepid option: %m");
}
}
else
{
mount_flags |= MS_BIND | MS_NOSUID;
msg("Binding %s on %s (flags %lx)\n", out, in, mount_flags);
/*
* This is tricky. We cannot run mount() with root privileges, since
* it could be used to bypass access control if the mounted path
* contains elements inaccessible to the user running isolate.
*
* We switch effective UID and GID back to the calling user (which clears
* all capabilities, but keeps them in the permitted set) and then
* enable CAP_SYS_ADMIN. So we have CAP_SYS_ADMIN (needed for mount),
* but not CAP_DAC_OVERRIDE (which allows to bypass permission checks).
*/
if (setresuid(orig_uid, orig_uid, 0) < 0 ||
setresgid(orig_gid, orig_gid, 0) < 0)
die("Cannot switch UID and GID: %m");
set_cap_sys_admin();
// Most mount flags need remount to work
if (mount(out, root_in, "none", mount_flags, "") < 0 ||
mount(out, root_in, "none", MS_REMOUNT | mount_flags, "") < 0)
die("Cannot mount %s on %s: %m", out, in);
if (setresuid(orig_uid, 0, orig_uid) < 0 ||
setresgid(orig_gid, 0, orig_gid) < 0)
die("Cannot switch UID and GID: %m");
}
}
}
/*** Disk quotas ***/
static int
path_begins_with(char *path, char *with)
{
while (*with)
if (*path++ != *with++)
return 0;
return (!*with || *with == '/');
}
static char *
find_device(char *path)
{
FILE *f = setmntent("/proc/mounts", "r");
if (!f)
die("Cannot open /proc/mounts: %m");
struct mntent *me;
int best_len = 0;
char *best_dev = NULL;
while (me = getmntent(f))
{
if (!path_begins_with(me->mnt_fsname, "/dev"))
continue;
if (path_begins_with(path, me->mnt_dir))
{
int len = strlen(me->mnt_dir);
if (len > best_len)
{
best_len = len;
free(best_dev);
best_dev = xstrdup(me->mnt_fsname);
}
}
}
endmntent(f);
return best_dev;
}
void
set_quota(void)
{
if (!block_quota)
return;
char cwd[PATH_MAX];
if (!getcwd(cwd, sizeof(cwd)))
die("getcwd: %m");
char *dev = find_device(cwd);
if (!dev)
die("Cannot identify filesystem which contains %s", cwd);
msg("Quota: Mapped path %s to a filesystem on %s\n", cwd, dev);
// Sanity check
struct stat dev_st, cwd_st;
if (stat(dev, &dev_st) < 0)
die("Cannot identify block device %s: %m", dev);
if (!S_ISBLK(dev_st.st_mode))
die("Expected that %s is a block device", dev);
if (stat(".", &cwd_st) < 0)
die("Cannot stat cwd: %m");
if (cwd_st.st_dev != dev_st.st_rdev)
die("Identified %s as a filesystem on %s, but it is obviously false", cwd, dev);
struct dqblk dq = {
.dqb_bhardlimit = block_quota,
.dqb_bsoftlimit = block_quota,
.dqb_ihardlimit = inode_quota,
.dqb_isoftlimit = inode_quota,
.dqb_valid = QIF_LIMITS,
};
if (quotactl(QCMD(Q_SETQUOTA, USRQUOTA), dev, box_uid, (caddr_t) &dq) < 0)
die("Cannot set disk quota: %m");
msg("Quota: Set block quota %d and inode quota %d\n", block_quota, inode_quota);
free(dev);
}