|
|
/*
|
|
|
* Process Isolator -- Control Groups
|
|
|
*
|
|
|
* (c) 2012-2016 Martin Mares <mj@ucw.cz>
|
|
|
* (c) 2012-2014 Bernard Blackham <bernard@blackham.com.au>
|
|
|
*/
|
|
|
|
|
|
#include "isolate.h"
|
|
|
|
|
|
#include <assert.h>
|
|
|
#include <errno.h>
|
|
|
#include <fcntl.h>
|
|
|
#include <stdio.h>
|
|
|
#include <stdlib.h>
|
|
|
#include <string.h>
|
|
|
#include <sys/stat.h>
|
|
|
#include <unistd.h>
|
|
|
|
|
|
struct cg_controller_desc {
|
|
|
const char *name;
|
|
|
int optional;
|
|
|
};
|
|
|
|
|
|
typedef enum {
|
|
|
CG_MEMORY = 0,
|
|
|
CG_CPUACCT,
|
|
|
CG_CPUSET,
|
|
|
CG_NUM_CONTROLLERS,
|
|
|
CG_PARENT = 256,
|
|
|
} cg_controller;
|
|
|
|
|
|
static const struct cg_controller_desc cg_controllers[CG_NUM_CONTROLLERS+1] = {
|
|
|
[CG_MEMORY] = { "memory", 0 },
|
|
|
[CG_CPUACCT] = { "cpuacct", 0 },
|
|
|
[CG_CPUSET] = { "cpuset", 1 },
|
|
|
[CG_NUM_CONTROLLERS] = { NULL, 0 },
|
|
|
};
|
|
|
|
|
|
#define FOREACH_CG_CONTROLLER(_controller) \
|
|
|
for (cg_controller (_controller) = 0; \
|
|
|
(_controller) < CG_NUM_CONTROLLERS; (_controller)++)
|
|
|
|
|
|
static const char *
|
|
|
cg_controller_name(cg_controller c)
|
|
|
{
|
|
|
assert(c < CG_NUM_CONTROLLERS);
|
|
|
return cg_controllers[c].name;
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
cg_controller_optional(cg_controller c)
|
|
|
{
|
|
|
assert(c < CG_NUM_CONTROLLERS);
|
|
|
return cg_controllers[c].optional;
|
|
|
}
|
|
|
|
|
|
static char cg_name[256];
|
|
|
static char cg_parent_name[256];
|
|
|
|
|
|
#define CG_BUFSIZE 1024
|
|
|
|
|
|
static void
|
|
|
cg_makepath(char *buf, size_t len, cg_controller c, const char *attr)
|
|
|
{
|
|
|
snprintf(buf, len, "%s/%s/%s/%s",
|
|
|
cf_cg_root,
|
|
|
cg_controller_name(c & ~CG_PARENT),
|
|
|
(c & CG_PARENT) ? cg_parent_name : cg_name,
|
|
|
attr);
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
cg_read(cg_controller controller, const char *attr, char *buf)
|
|
|
{
|
|
|
int result = 0;
|
|
|
int maybe = 0;
|
|
|
if (attr[0] == '?')
|
|
|
{
|
|
|
attr++;
|
|
|
maybe = 1;
|
|
|
}
|
|
|
|
|
|
char path[256];
|
|
|
cg_makepath(path, sizeof(path), controller, attr);
|
|
|
|
|
|
int fd = open(path, O_RDONLY);
|
|
|
if (fd < 0)
|
|
|
{
|
|
|
if (maybe)
|
|
|
goto fail;
|
|
|
die("Cannot read %s: %m", path);
|
|
|
}
|
|
|
|
|
|
int n = read(fd, buf, CG_BUFSIZE);
|
|
|
if (n < 0)
|
|
|
{
|
|
|
if (maybe)
|
|
|
goto fail_close;
|
|
|
die("Cannot read %s: %m", path);
|
|
|
}
|
|
|
if (n >= CG_BUFSIZE - 1)
|
|
|
die("Attribute %s too long", path);
|
|
|
if (n > 0 && buf[n-1] == '\n')
|
|
|
n--;
|
|
|
buf[n] = 0;
|
|
|
|
|
|
if (verbose > 1)
|
|
|
msg("CG: Read %s = <%s>\n", attr, buf);
|
|
|
|
|
|
result = 1;
|
|
|
fail_close:
|
|
|
close(fd);
|
|
|
fail:
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
static void __attribute__((format(printf,3,4)))
|
|
|
cg_write(cg_controller controller, const char *attr, const char *fmt, ...)
|
|
|
{
|
|
|
int maybe = 0;
|
|
|
if (attr[0] == '?')
|
|
|
{
|
|
|
attr++;
|
|
|
maybe = 1;
|
|
|
}
|
|
|
|
|
|
va_list args;
|
|
|
va_start(args, fmt);
|
|
|
|
|
|
char buf[CG_BUFSIZE];
|
|
|
int n = vsnprintf(buf, sizeof(buf), fmt, args);
|
|
|
if (n >= CG_BUFSIZE)
|
|
|
die("cg_write: Value for attribute %s is too long", attr);
|
|
|
|
|
|
if (verbose > 1)
|
|
|
msg("CG: Write %s = %s", attr, buf);
|
|
|
|
|
|
char path[256];
|
|
|
cg_makepath(path, sizeof(path), controller, attr);
|
|
|
|
|
|
int fd = open(path, O_WRONLY | O_TRUNC);
|
|
|
if (fd < 0)
|
|
|
{
|
|
|
if (maybe)
|
|
|
goto fail;
|
|
|
else
|
|
|
die("Cannot write %s: %m", path);
|
|
|
}
|
|
|
|
|
|
int written = write(fd, buf, n);
|
|
|
if (written < 0)
|
|
|
{
|
|
|
if (maybe)
|
|
|
goto fail_close;
|
|
|
else
|
|
|
die("Cannot set %s to %s: %m", path, buf);
|
|
|
}
|
|
|
if (written != n)
|
|
|
die("Short write to %s (%d out of %d bytes)", path, written, n);
|
|
|
|
|
|
fail_close:
|
|
|
close(fd);
|
|
|
fail:
|
|
|
va_end(args);
|
|
|
}
|
|
|
|
|
|
void
|
|
|
cg_init(void)
|
|
|
{
|
|
|
if (!cg_enable)
|
|
|
return;
|
|
|
|
|
|
if (!dir_exists(cf_cg_root))
|
|
|
die("Control group filesystem at %s not mounted", cf_cg_root);
|
|
|
|
|
|
if (cf_cg_parent)
|
|
|
{
|
|
|
snprintf(cg_name, sizeof(cg_name), "%s/box-%d", cf_cg_parent, box_id);
|
|
|
snprintf(cg_parent_name, sizeof(cg_parent_name), "%s", cf_cg_parent);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
snprintf(cg_name, sizeof(cg_name), "box-%d", box_id);
|
|
|
strcpy(cg_parent_name, ".");
|
|
|
}
|
|
|
msg("Using control group %s under parent %s\n", cg_name, cg_parent_name);
|
|
|
}
|
|
|
|
|
|
void
|
|
|
cg_prepare(void)
|
|
|
{
|
|
|
if (!cg_enable)
|
|
|
return;
|
|
|
|
|
|
struct stat st;
|
|
|
char buf[CG_BUFSIZE];
|
|
|
char path[256];
|
|
|
|
|
|
FOREACH_CG_CONTROLLER(controller)
|
|
|
{
|
|
|
cg_makepath(path, sizeof(path), controller, "");
|
|
|
if (stat(path, &st) >= 0 || errno != ENOENT)
|
|
|
{
|
|
|
msg("Control group %s already exists, trying to empty it.\n", path);
|
|
|
if (rmdir(path) < 0)
|
|
|
die("Failed to reset control group %s: %m", path);
|
|
|
}
|
|
|
|
|
|
if (mkdir(path, 0777) < 0 && !cg_controller_optional(controller))
|
|
|
die("Failed to create control group %s: %m", path);
|
|
|
}
|
|
|
|
|
|
// If the cpuset module is enabled, set up allowed cpus and memory nodes.
|
|
|
// If per-box configuration exists, use it; otherwise, inherit the settings
|
|
|
// from the parent cgroup.
|
|
|
struct cf_per_box *cf = cf_current_box();
|
|
|
if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.cpus", buf))
|
|
|
cg_write(CG_CPUSET, "cpuset.cpus", "%s", cf->cpus ? cf->cpus : buf);
|
|
|
if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.mems", buf))
|
|
|
cg_write(CG_CPUSET, "cpuset.mems", "%s", cf->mems ? cf->mems : buf);
|
|
|
}
|
|
|
|
|
|
void
|
|
|
cg_enter(void)
|
|
|
{
|
|
|
if (!cg_enable)
|
|
|
return;
|
|
|
|
|
|
msg("Entering control group %s\n", cg_name);
|
|
|
|
|
|
FOREACH_CG_CONTROLLER(controller)
|
|
|
{
|
|
|
if (cg_controller_optional(controller))
|
|
|
cg_write(controller, "?tasks", "%d\n", (int) getpid());
|
|
|
else
|
|
|
cg_write(controller, "tasks", "%d\n", (int) getpid());
|
|
|
}
|
|
|
|
|
|
if (cg_memory_limit)
|
|
|
{
|
|
|
cg_write(CG_MEMORY, "memory.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10);
|
|
|
cg_write(CG_MEMORY, "?memory.memsw.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10);
|
|
|
cg_write(CG_MEMORY, "memory.max_usage_in_bytes", "0\n");
|
|
|
cg_write(CG_MEMORY, "?memory.memsw.max_usage_in_bytes", "0\n");
|
|
|
}
|
|
|
|
|
|
if (cg_timing)
|
|
|
cg_write(CG_CPUACCT, "cpuacct.usage", "0\n");
|
|
|
}
|
|
|
|
|
|
int
|
|
|
cg_get_run_time_ms(void)
|
|
|
{
|
|
|
if (!cg_enable)
|
|
|
return 0;
|
|
|
|
|
|
char buf[CG_BUFSIZE];
|
|
|
cg_read(CG_CPUACCT, "cpuacct.usage", buf);
|
|
|
unsigned long long ns = atoll(buf);
|
|
|
return ns / 1000000;
|
|
|
}
|
|
|
|
|
|
void
|
|
|
cg_stats(void)
|
|
|
{
|
|
|
if (!cg_enable)
|
|
|
return;
|
|
|
|
|
|
char buf[CG_BUFSIZE];
|
|
|
|
|
|
// Memory usage statistics
|
|
|
unsigned long long mem=0, memsw=0;
|
|
|
if (cg_read(CG_MEMORY, "?memory.max_usage_in_bytes", buf))
|
|
|
mem = atoll(buf);
|
|
|
if (cg_read(CG_MEMORY, "?memory.memsw.max_usage_in_bytes", buf))
|
|
|
{
|
|
|
memsw = atoll(buf);
|
|
|
if (memsw > mem)
|
|
|
mem = memsw;
|
|
|
}
|
|
|
if (mem)
|
|
|
meta_printf("cg-mem:%lld\n", mem >> 10);
|
|
|
|
|
|
// OOM kill detection
|
|
|
if (cg_read(CG_MEMORY, "?memory.oom_control", buf))
|
|
|
{
|
|
|
int oom_killed = 0;
|
|
|
char *s = buf;
|
|
|
while (s)
|
|
|
{
|
|
|
if (sscanf(s, "oom_kill %d", &oom_killed) == 1 && oom_killed)
|
|
|
{
|
|
|
meta_printf("cg-oom-killed:1\n");
|
|
|
break;
|
|
|
}
|
|
|
s = strchr(s, '\n');
|
|
|
if (s)
|
|
|
s++;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void
|
|
|
cg_remove(void)
|
|
|
{
|
|
|
char buf[CG_BUFSIZE];
|
|
|
|
|
|
if (!cg_enable)
|
|
|
return;
|
|
|
|
|
|
FOREACH_CG_CONTROLLER(controller)
|
|
|
{
|
|
|
// The cgroup can be non-existent at this moment (e.g., --cleanup before the first --init)
|
|
|
if (!cg_read(controller, "?tasks", buf))
|
|
|
continue;
|
|
|
|
|
|
if (buf[0])
|
|
|
die("Some tasks left in controller %s of cgroup %s, failed to remove it",
|
|
|
cg_controller_name(controller), cg_name);
|
|
|
|
|
|
char path[256];
|
|
|
cg_makepath(path, sizeof(path), controller, "");
|
|
|
|
|
|
if (rmdir(path) < 0)
|
|
|
die("Cannot remove control group %s: %m", path);
|
|
|
}
|
|
|
}
|
|
|
|