1 /* SPDX-License-Identifier: GPL-2.0-only */
10 #include <sys/sysmacros.h>
13 #include <sys/socket.h>
14 #include <sys/capability.h>
15 #include <sys/syscall.h>
17 #include "micoforia.lsg.h"
19 static struct lls_parse_result *lpr, *sublpr;
20 unsigned loglevel_arg_val = 4;
33 struct ifspec *ifspec;
34 /* this is never zero, even if no ifspec was given */
37 unsigned num_dac_entries;
39 unsigned num_io_max_entries;
40 /* ~0U: not given, 0: unlimited */
42 unsigned memory_limit;
46 unsigned num_capdrops;
51 static struct container **container;
52 static unsigned num_containers;
54 struct container_runtime {
55 int pipe1[2], pipe2[2]; /* for startup communication */
58 int *master, *slave, *client;
60 int init_pid; /* in the parent namespace */
61 char *pts, *root, *dev;
65 static char **default_dacl, **default_io_max;
66 unsigned num_default_dac_entries, num_default_io_max_entries;
67 static cap_value_t *default_capdrop;
68 unsigned num_default_capdrops;
69 uint32_t *default_tty;
70 unsigned num_default_ttys;
71 static const struct lls_command *subcmd;
72 /* does not allocate memory */
73 void m7a_log(int ll, const char* fmt,...)
77 if (ll < loglevel_arg_val)
80 if (subcmd == lls_cmd(LSG_MICOFORIA_CMD_START, micoforia_suite)) {
84 assert(clock_gettime(CLOCK_REALTIME, &t) == 0);
85 tm = localtime(&t.tv_sec);
86 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
87 fprintf(stderr, "%s:%04lu ", str,
88 (long unsigned)t.tv_nsec / 1000 / 1000);
89 fprintf(stderr, "(%u) ", (unsigned)getpid());
91 vfprintf(stderr, fmt, argp);
95 static void die_lopsub(int lopsub_ret, char **errctx)
97 const char *m = lls_strerror(-lopsub_ret);
99 ERROR_LOG("%s: %s\n", *errctx, m);
101 ERROR_LOG("%s\n", m);
107 #define FOR_EACH_CONTAINER(_c) for ( \
108 struct container **_cp = container; \
110 (_cp)++, (_c) = *(_cp) \
113 static struct container *get_container(const char *name)
116 FOR_EACH_CONTAINER(c) {
117 if (!strcmp(c->name, name))
123 static struct container *get_or_append_container(const char *name)
125 struct container *c = get_container(name);
128 container = xrealloc(container,
129 (++num_containers + 1) * sizeof(struct container *));
130 c = container[num_containers - 1] = xzmalloc(sizeof(struct container));
131 c->name = xstrdup(name);
132 /* ~0U means: not given */
134 c->memory_limit = ~0U;
136 container[num_containers] = NULL;
140 static unsigned get_container_ttys(const struct container *c, uint32_t **result)
142 static uint32_t dflt = {1};
143 if (c->num_ttys > 0) {
147 if (num_default_ttys > 0) {
148 *result = default_tty;
149 return num_default_ttys;
155 enum clo_given_counter {
156 CLOGC_DEFAULT_CGROUP_DAC,
158 CLOGC_DEFAULT_IO_MAX,
163 static unsigned clo_given_counter[NUM_CLOGCS];
165 static void append_dac_entry(const char *arg, char ***listp, unsigned *count)
167 char *val = parse_cgroup_acl(arg);
169 *listp = xrealloc(*listp, (*count + 1) * sizeof(char *));
170 (*listp)[*count - 1] = val;
171 (*listp)[*count] = NULL;
174 static void append_io_max_entry(const char *arg, char ***listp, unsigned *count)
177 *listp = xrealloc(*listp, (*count + 1) * sizeof(char *));
178 (*listp)[*count - 1] = xstrdup(arg);
179 (*listp)[*count] = NULL;
182 static void check_options(void)
190 container = xzmalloc(sizeof(struct container *));
191 /* loop backwards to let command line opts override config file opts */
192 for (n = OPT_GIVEN(MICOFORIA, CONTAINER) - 1; n != ~0U; n--) {
193 arg = OPT_STRING_VAL_N(n, MICOFORIA, CONTAINER);
195 get_or_append_container(arg);
197 for (n = OPT_GIVEN(MICOFORIA, PRE_START_HOOK) - 1; n != ~0U; n--) {
198 arg = OPT_STRING_VAL_N(n, MICOFORIA, PRE_START_HOOK);
199 parse_compound_arg(arg, "pre-start-hook", &name, &val);
200 c = get_or_append_container(name);
202 free(c->pre_start_hook);
203 c->pre_start_hook = val;
205 for (n = OPT_GIVEN(MICOFORIA, PRE_EXEC_HOOK) - 1; n != ~0U; n--) {
206 arg = OPT_STRING_VAL_N(n, MICOFORIA, PRE_EXEC_HOOK);
207 parse_compound_arg(arg, "pre-exec-hook", &name, &val);
208 c = get_or_append_container(name);
210 free(c->pre_exec_hook);
211 c->pre_exec_hook = val;
213 for (n = OPT_GIVEN(MICOFORIA, CAPDROP) - 1; n != ~0U; n--) {
215 arg = OPT_STRING_VAL_N(n, MICOFORIA, CAPDROP);
216 parse_compound_arg(arg, "capabilities", &name, &val);
217 c = get_or_append_container(name);
218 if (cap_from_name(val, &cap_val) < 0)
219 die_errno("%s: invalid capability: %s", name, val);
220 c->capdrop = xrealloc(c->capdrop,
221 ++c->num_capdrops * sizeof(cap_value_t));
222 c->capdrop[c->num_capdrops - 1] = cap_val;
226 for (n = 0; n < OPT_GIVEN(MICOFORIA, DEFAULT_CAPDROP); n++) {
228 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CAPDROP);
229 if (cap_from_name(arg, &cap_val) < 0)
230 die_errno("invalid default capability: %s", val);
231 default_capdrop = xrealloc(default_capdrop,
232 ++num_default_capdrops * sizeof(cap_value_t));
233 default_capdrop[num_default_capdrops - 1] = cap_val;
235 for (n = OPT_GIVEN(MICOFORIA, TTY) - 1; n != ~0U; n--) {
237 arg = OPT_STRING_VAL_N(n, MICOFORIA, TTY);
238 parse_compound_arg(arg, "tty", &name, &val);
239 c = get_or_append_container(name);
240 minor = atou32(val, "tty");
242 die("can not capture tty0");
243 c->tty = xrealloc(c->tty, ++c->num_ttys * sizeof(uint32_t));
244 c->tty[c->num_ttys - 1] = minor;
248 for (n = 0; n < OPT_GIVEN(MICOFORIA, DEFAULT_TTY); n++) {
249 uint32_t minor = OPT_UINT32_VAL_N(n, MICOFORIA, DEFAULT_TTY);
251 die("can not capture tty0");
252 default_tty = xrealloc(default_tty,
253 ++num_default_ttys * sizeof(uint32_t));
254 default_tty[num_default_ttys - 1] = minor;
257 for (n = OPT_GIVEN(MICOFORIA, ROOT_DIRECTORY) - 1; n != ~0U ; n--) {
258 arg = OPT_STRING_VAL_N(n, MICOFORIA, ROOT_DIRECTORY);
259 parse_compound_arg(arg, "root-directory", &name, &val);
260 c = get_or_append_container(name);
265 u32 = OPT_UINT32_VAL(MICOFORIA, DEFAULT_CPU_CORES);
266 check_range(u32, 0, 65536, "default-cpu-cores");
267 for (n = OPT_GIVEN(MICOFORIA, CPU_CORES) - 1; n != ~0U ; n--) {
268 arg = OPT_STRING_VAL_N(n, MICOFORIA, CPU_CORES);
269 parse_compound_arg(arg, "cpu-cores", &name, &val);
270 c = get_or_append_container(name);
272 u32 = atou32(val, "cpu-cores");
274 check_range(u32, 0, 65536, "cpu-cores");
277 u32 = OPT_UINT32_VAL(MICOFORIA, DEFAULT_MEMORY_LIMIT);
278 check_range(u32, 0, 1024 * 1024, "default-memory-limit");
279 for (n = OPT_GIVEN(MICOFORIA, MEMORY_LIMIT) - 1; n != ~0U ; n--) {
280 arg = OPT_STRING_VAL_N(n, MICOFORIA, MEMORY_LIMIT);
281 parse_compound_arg(arg, "memory-limit", &name, &val);
282 c = get_or_append_container(name);
284 u32 = atou32(val, "memory-limit");
286 check_range(u32, 0, 1024 * 1024, "memory-limit");
287 c->memory_limit = u32;
289 for (n = OPT_GIVEN(MICOFORIA, INIT) - 1; n != ~0U ; n--) {
290 arg = OPT_STRING_VAL_N(n, MICOFORIA, INIT);
291 parse_compound_arg(arg, "init", &name, &val);
292 c = get_or_append_container(name);
297 for (n = 0; n < OPT_GIVEN(MICOFORIA, NET); n++) {
298 struct ifspec *ifspec;
299 arg = OPT_STRING_VAL_N(n, MICOFORIA, NET);
300 parse_compound_arg(arg, "net", &name, &val);
301 c = get_or_append_container(name);
303 c->ifspec = xrealloc(c->ifspec,
304 ++c->num_ifspecs * sizeof(struct ifspec));
305 ifspec = c->ifspec + c->num_ifspecs - 1;
306 parse_ifspec(val, &ifspec->bridge, ifspec->hwaddr);
310 m = clo_given_counter[CLOGC_DEFAULT_CGROUP_DAC];
311 for (n = m; n < OPT_GIVEN(MICOFORIA, DEFAULT_CGROUP_DAC); n++) {
312 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CGROUP_DAC);
313 append_dac_entry(arg, &default_dacl, &num_default_dac_entries);
315 for (n = 0; n < m; n++) {
316 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CGROUP_DAC);
317 append_dac_entry(arg, &default_dacl, &num_default_dac_entries);
319 m = clo_given_counter[CLOGC_CGROUP_DAC];
320 for (n = m; n < OPT_GIVEN(MICOFORIA, CGROUP_DAC); n++) {
321 arg = OPT_STRING_VAL_N(n, MICOFORIA, CGROUP_DAC);
322 parse_compound_arg(arg, "cgroup-dac", &name, &val);
323 c = get_or_append_container(name);
325 append_dac_entry(val, &c->dacl, &c->num_dac_entries);
328 for (n = 0; n < m; n++) {
329 arg = OPT_STRING_VAL_N(n, MICOFORIA, CGROUP_DAC);
330 parse_compound_arg(arg, "cgroup-dac", &name, &val);
331 c = get_or_append_container(name);
333 append_dac_entry(val, &c->dacl, &c->num_dac_entries);
337 m = clo_given_counter[CLOGC_DEFAULT_IO_MAX];
338 for (n = m; n < OPT_GIVEN(MICOFORIA, DEFAULT_IO_MAX); n++) {
339 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_IO_MAX);
340 append_io_max_entry(arg, &default_io_max, &num_default_io_max_entries);
342 for (n = 0; n < m; n++) {
343 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_IO_MAX);
344 append_io_max_entry(arg, &default_io_max, &num_default_io_max_entries);
346 m = clo_given_counter[CLOGC_IO_MAX];
347 for (n = m; n < OPT_GIVEN(MICOFORIA, IO_MAX); n++) {
348 arg = OPT_STRING_VAL_N(n, MICOFORIA, IO_MAX);
349 parse_compound_arg(arg, "io-max", &name, &val);
350 c = get_or_append_container(name);
352 append_io_max_entry(val, &c->io_max, &c->num_io_max_entries);
355 for (n = 0; n < m; n++) {
356 arg = OPT_STRING_VAL_N(n, MICOFORIA, IO_MAX);
357 parse_compound_arg(arg, "io-max", &name, &val);
358 c = get_or_append_container(name);
360 append_io_max_entry(val, &c->io_max, &c->num_io_max_entries);
364 /* init default c->ifspec[] */
365 FOR_EACH_CONTAINER(c) {
366 if (c->num_ifspecs == 0) {
367 const char *br = OPT_STRING_VAL(MICOFORIA, DEFAULT_BRIDGE);
369 c->ifspec = xmalloc(sizeof(struct ifspec));
370 c->ifspec[0].bridge = xstrdup(br);
371 memset(c->ifspec[0].hwaddr, 0, 6);
377 static void show_subcommand_summary(bool verbose)
381 #define LSG_MICOFORIA_CMD(_name) #_name
382 static const char * const subcommand_names[] = {LSG_MICOFORIA_SUBCOMMANDS NULL};
383 #undef LSG_MICOFORIA_CMD
384 printf("Available subcommands:\n");
386 const struct lls_command *cmd;
387 for (i = 1; (cmd = lls_cmd(i, micoforia_suite)); i++) {
388 const char *purpose = lls_purpose(cmd);
389 const char *name = lls_command_name(cmd);
390 printf("%-12s%s\n", name, purpose);
395 for (i = 0; i < LSG_NUM_MICOFORIA_SUBCOMMANDS; i++) {
402 n += printf("%s", subcommand_names[i]);
408 const char *GET_VERSION(void);
409 static void handle_version_and_help(void)
413 if (OPT_GIVEN(MICOFORIA, VERSION)) {
414 printf(PACKAGE " %s\n"
415 "Copyright (C) " COPYRIGHT_YEAR " " AUTHOR ".\n"
416 "License: " LICENSE " <" LICENSE_URL ">.\n"
417 "This is free software: you are free to change and redistribute it.\n"
418 "There is NO WARRANTY, to the extent permitted by law.\n"
420 "Web page: " URL "\n"
421 "Clone URL: " CLONE_URL "\n"
422 "Gitweb: " GITWEB_URL "\n"
423 "Author's Home Page: " HOME_URL "\n"
424 "Send feedback to: " AUTHOR " <" EMAIL ">\n"
430 if (OPT_GIVEN(MICOFORIA, DETAILED_HELP))
431 help = lls_long_help(CMD_PTR(MICOFORIA));
432 else if (OPT_GIVEN(MICOFORIA, HELP))
433 help = lls_short_help(CMD_PTR(MICOFORIA));
434 else if (lls_num_inputs(lpr) == 0) {
435 show_subcommand_summary(true /* verbose */);
439 printf("%s\n", help);
444 static char *get_config_file_path(void)
449 if (OPT_GIVEN(MICOFORIA, CONFIG_FILE))
450 return xstrdup(OPT_STRING_VAL(MICOFORIA, CONFIG_FILE));
451 pw = getpwuid(getuid());
452 home = pw? pw->pw_dir : "/root";
453 return msg("%s/.micoforiarc", home);
456 static void parse_options(int argc, char **argv, const struct lls_command *cmd,
457 struct lls_parse_result **lprp)
465 char **cf_argv, *errctx = NULL;
466 const char *subcmd_name;
467 struct lls_parse_result *merged_lpr, *cf_lpr;
469 ret = lls_parse(argc, argv, cmd, lprp, &errctx);
471 die_lopsub(ret, &errctx);
472 handle_version_and_help();
473 clo_given_counter[CLOGC_DEFAULT_CGROUP_DAC] = OPT_GIVEN(MICOFORIA,
475 clo_given_counter[CLOGC_CGROUP_DAC] = OPT_GIVEN(MICOFORIA, CGROUP_DAC);
476 clo_given_counter[CLOGC_DEFAULT_IO_MAX] =
477 OPT_GIVEN(MICOFORIA, DEFAULT_IO_MAX);
478 clo_given_counter[CLOGC_IO_MAX] = OPT_GIVEN(MICOFORIA, IO_MAX);
479 config_file = get_config_file_path();
480 ret = open(config_file, O_RDONLY);
482 if (errno != ENOENT || OPT_GIVEN(MICOFORIA, CONFIG_FILE))
483 die_errno("can not open config file %s", config_file);
484 /* no config file -- nothing to do */
489 ret = fstat(fd, &statbuf);
491 die_errno("failed to stat config file %s", config_file);
492 sz = statbuf.st_size;
493 if (sz == 0) { /* config file is empty -- nothing to do */
497 map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
498 if (map == MAP_FAILED)
499 die_errno("failed to mmap config file %s", config_file);
500 subcmd_name = (cmd == CMD_PTR(MICOFORIA))? NULL : lls_command_name(cmd);
501 ret = lls_convert_config(map, sz, subcmd_name, &cf_argv,
505 ERROR_LOG("failed to convert config file %s\n", config_file);
506 die_lopsub(ret, &errctx);
509 ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
510 lls_free_argv(cf_argv);
512 die_lopsub(ret, &errctx);
513 /* command line options override config file options */
514 ret = lls_merge(*lprp, cf_lpr, cmd, &merged_lpr, &errctx);
516 die_lopsub(ret, &errctx);
517 lls_free_parse_result(cf_lpr, cmd);
518 lls_free_parse_result(*lprp, cmd);
526 static const char *get_pre_start_hook(const struct container *c)
528 if (c->pre_start_hook)
529 return c->pre_start_hook;
530 return OPT_STRING_VAL(MICOFORIA, DEFAULT_PRE_START_HOOK);
533 static const char *get_pre_exec_hook(const struct container *c)
535 if (c->pre_exec_hook)
536 return c->pre_exec_hook;
537 return OPT_STRING_VAL(MICOFORIA, DEFAULT_PRE_EXEC_HOOK);
540 static char *get_root_dir(const struct container *c)
543 return xstrdup(c->root_dir);
544 return msg("%s/%s", OPT_STRING_VAL(MICOFORIA, DEFAULT_ROOT_PREFIX), c->name);
547 static char *get_ifspec_string(const struct container *c)
552 assert(c->num_ifspecs > 0);
553 for (n = 0; n < c->num_ifspecs; n++) {
554 uint8_t *x = c->ifspec[n].hwaddr;
555 char *tmp = msg("%s%s%s:%02x:%02x:%02x:%02x:%02x:%02x",
559 x[0], x[1], x[2], x[3], x[4], x[5]
567 static char *interface_name(const struct container *c, unsigned idx, bool peer)
569 assert(idx < c->num_ifspecs);
570 if (c->num_ifspecs == 1)
571 return peer? msg("%s-g", c->name) : xstrdup(c->name);
573 return msg("%s-%s-g", c->name, c->ifspec[idx].bridge);
574 return msg("%s-%s", c->name, c->ifspec[idx].bridge);
577 static void set_m7a_root_dir_env(const struct container *c)
579 char *root = get_root_dir(c);
580 DEBUG_LOG("root dir: %s\n", root);
581 setenv("MICOFORIA_ROOT_DIR", root, 1);
585 static bool run_pre_start_hook(const struct container *c)
588 char *cmd = xstrdup(get_pre_start_hook(c));
589 char *argv[] = {"/bin/sh", "-c", cmd, NULL};
592 setenv("MICOFORIA_CONTAINER_NAME", c->name, 1);
593 set_m7a_root_dir_env(c);
595 ifspec = get_ifspec_string(c);
596 DEBUG_LOG("ifspecs: %s\n", ifspec);
597 setenv("MICOFORIA_IFSPECS", ifspec, 1);
600 INFO_LOG("running pre-start hook %s\n", cmd);
601 success = xexec(argv, NULL);
604 ERROR_LOG("pre-start hook failed\n");
605 unsetenv("MICOFORIA_CONTAINER_NAME");
606 unsetenv("MICOFORIA_IFSPECS");
607 unsetenv("MICOFORIA_ROOT_DIR");
611 static void run_pre_exec_hook(const struct container *c)
613 char *cmd = xstrdup(get_pre_exec_hook(c));
614 char *argv[] = {"/bin/sh", "-c", cmd, NULL};
616 INFO_LOG("/bin/sh -c '%s'\n", cmd);
617 set_m7a_root_dir_env(c);
618 if (!xexec(argv, NULL))
619 die("%s: pre-exec hook failed", c->name);
621 unsetenv("MICOFORIA_ROOT_DIR");
624 static void write_cgroup(const char *path, const char *txt)
629 if ((fd = open(path, O_WRONLY)) < 0)
630 die_errno("open %s", path);
632 if (write(fd, txt, sz) != sz)
633 die_errno("could not write to %s", path);
637 static unsigned get_dacl(const struct container *c, char ***result)
639 static char *dflt[] = {
640 "da", /* deny access to all devices except the ones below */
641 "ac 1:3 rwm", /* null */
642 "ac 1:5 rwm", /* zero */
643 "ac 1:7 rwm", /* full */
644 "ac 1:8 rwm", /* random */
645 "ac 1:9 rwm", /* urandom */
646 "ac 4:* rwm", /* tty?* */
647 "ac 5:0 rwm", /* tty */
648 "ac 5:2 rwm", /* ptmx */
649 "ac 136:* rwm", /* pts */
651 if (c->num_dac_entries > 0) {
653 return c->num_dac_entries;
655 if (num_default_dac_entries > 0) {
656 *result = default_dacl;
657 return num_default_dac_entries;
660 return ARRAY_SIZE(dflt);
663 static void apply_dacl(const struct container *c)
666 unsigned n, num_entries;
667 char *m7a_dir, *container_dir, *allow, *deny, *procs, *txt;
668 int fd, allow_fd, deny_fd;
671 m7a_dir = msg("/var/cgroup/micoforia");
672 container_dir = msg("%s/%s", m7a_dir, c->name);
673 allow = msg("%s/devices.allow", container_dir);
674 deny = msg("%s/devices.deny", container_dir);
675 procs = msg("%s/cgroup.procs", container_dir);
677 if (mkdir(m7a_dir, 0777) < 0 && errno != EEXIST)
678 die_errno("mkdir %s", m7a_dir);
680 if (mkdir(container_dir, 0777) < 0 && errno != EEXIST)
681 die_errno("mkdir %s", container_dir);
683 if ((allow_fd = open(allow, O_WRONLY)) < 0)
684 die_errno("open %s", allow);
686 if ((deny_fd = open(deny, O_WRONLY)) < 0)
687 die_errno("open %s", deny);
690 num_entries = get_dacl(c, &dacl);
691 INFO_LOG("applying %u entr%s\n", num_entries, num_entries == 1?
693 for (n = 0; n < num_entries; n++) {
694 char *entry = dacl[n];
695 DEBUG_LOG("dac entry #%u: %s %s\n", n, dacl[n][0] == 'a'?
696 "allow" : "deny", dacl[n] + 1);
697 txt = msg("%s\n", entry + 1);
699 fd = entry[0] == 'a'? allow_fd : deny_fd;
700 if (write(fd, txt, sz) != sz)
701 die_errno("could not write to cgroup devices.%s file",
702 entry[0] == 'a'? "allow" : "deny");
707 txt = msg("%u\n", (unsigned)getpid());
708 write_cgroup(procs, txt);
712 static void cgroup_init(void)
714 const char controllers[] = "+cpu +memory +io\n";
717 if (access("/var/cgroup/cgroup.clone_children", F_OK) < 0)
718 die("cgroup v1 not mounted at /var/cgroup/");
719 if (access("/var/cgroup2/cgroup.subtree_control", F_OK) < 0)
720 die("cgroup v1 not mounted at /var/cgroup/");
721 write_cgroup("/var/cgroup2/cgroup.subtree_control", controllers);
722 m7a_dir = msg("/var/cgroup2/micoforia");
723 if (mkdir(m7a_dir, 0777) < 0 && errno != EEXIST)
724 die_errno("mkdir %s", m7a_dir);
725 ctl = msg("%s/cgroup.subtree_control", m7a_dir);
727 write_cgroup(ctl, controllers);
731 static void create_cgroup_v2(const struct container *c)
734 char *ctl, *dir = msg("/var/cgroup2/micoforia/%s", c->name);
736 if (mkdir(dir, 0777) < 0 && errno != EEXIST)
737 die_errno("mkdir %s", dir);
738 ctl = msg("%s/cgroup.procs", dir);
740 sprintf(buf, "%u\n", (unsigned)getpid());
741 write_cgroup(ctl, buf);
745 static unsigned get_cpu_cores(const struct container *c)
747 return c->cpu_cores != ~0U? c->cpu_cores :
748 OPT_UINT32_VAL(MICOFORIA, DEFAULT_CPU_CORES);
751 static void apply_cpu_limit(const struct container *c)
754 unsigned cores = get_cpu_cores(c);
756 if (cores == 0) /* unlimited */
758 assert(cores != ~0U);
759 INFO_LOG("%u core%s\n", cores, cores == 1? "" : "s");
760 ctl = msg("/var/cgroup2/micoforia/%s/cpu.max", c->name);
761 str = msg("%u 1000000\n", 1000000 * cores);
762 write_cgroup(ctl, str);
767 static unsigned get_memory_limit(const struct container *c)
769 return c->memory_limit != ~0U? c->memory_limit :
770 OPT_UINT32_VAL(MICOFORIA, DEFAULT_MEMORY_LIMIT);
773 static void apply_memory_limit(const struct container *c)
776 unsigned gigs = get_memory_limit(c);
778 if (gigs == 0) /* unlimited */
781 INFO_LOG("%uG\n", gigs);
782 ctl = msg("/var/cgroup2/micoforia/%s/memory.high", c->name);
783 str = msg("%llu\n", 1024LLU * 1024LLU * 1024LLU * gigs);
784 write_cgroup(ctl, str);
789 static unsigned get_iospecs(const struct container *c, char ***result)
791 if (c->num_io_max_entries > 0) {
793 return c->num_io_max_entries;
795 if (num_default_io_max_entries > 0) {
796 *result = default_io_max;
797 return num_default_io_max_entries;
803 static void apply_io_limit(const struct container *c)
805 unsigned n, num_entries;
809 num_entries = get_iospecs(c, &iospec);
810 if (num_entries == 0)
812 INFO_LOG("%u entries\n", num_entries);
813 io_max = msg("/var/cgroup2/micoforia/%s/io.max", c->name);
814 for (n = 0; n < num_entries; n++)
815 write_cgroup(io_max, iospec[n]);
819 static void cgroup_cleanup(const struct container *c)
821 char *dir = msg("/var/cgroup/micoforia/%s", c->name);
822 remove_subdirs_recursively(dir);
824 dir = msg("/var/cgroup2/micoforia/%s", c->name);
825 remove_subdirs_recursively(dir);
829 static bool setup_network(const struct container *c)
835 WARNING_LOG("could not set establish loopback link\n");
836 for (n = 0; n < c->num_ifspecs; n++) {
837 iface = interface_name(c, n, false);
838 peer = interface_name(c, n, true);
839 link_del(iface); /* ignore errors */
840 if (!create_veth_device_pair(iface, peer))
842 if (!set_hwaddr(peer, c->ifspec[n].hwaddr))
844 if (!attach_to_bridge(iface, c->ifspec[n].bridge))
858 static void setup_termios(int fd)
860 struct winsize wsz; /* see ioctl_tty(2) */
865 if (tcgetattr(fd, &tios)) {
866 ERROR_LOG("tcgetattr: %m\n");
869 tios.c_lflag &= ~(ECHO | ISIG | ICANON);
871 tios.c_cc[VTIME] = 0;
872 if (tcsetattr(fd, TCSAFLUSH, &tios) < 0)
873 ERROR_LOG("tcsetattr: %m\n");
874 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &wsz) >= 0)
875 ioctl(fd, TIOCSWINSZ, &wsz);
878 struct device_node_info {
879 unsigned major, minor;
884 static void create_standard_device_nodes(struct container_runtime *cr)
886 const struct device_node_info devices[] = {
887 {.major = 1, .minor = 3, .mode = 0666, .name = "null"},
888 {.major = 1, .minor = 5, .mode = 0666, .name = "zero"},
889 {.major = 1, .minor = 7, .mode = 0666, .name = "full"},
890 {.major = 1, .minor = 8, .mode = 0666, .name = "random"},
891 {.major = 1, .minor = 9, .mode = 0666, .name = "urandom"},
892 {.major = 4, .minor = 0, .mode = 0620, .name = "tty0"},
893 {.major = 5, .minor = 0, .mode = 0666, .name = "tty"},
894 {.major = 5, .minor = 1, .mode = 0600, .name = "console"},
895 {.major = 5, .minor = 2, .mode = 0666, .name = "ptmx"},
899 for (n = 0; n < ARRAY_SIZE(devices); n++) {
900 const struct device_node_info *d = devices + n;
901 char *path = msg("%s/%s", cr->dev, d->name);
902 if (mknod(path, S_IFCHR, makedev(d->major, d->minor)) < 0)
903 die_errno("mknod %s", d->name);
904 chmod(path, d->mode);
909 static void init_console(struct container_runtime *cr)
914 if (mount(NULL, cr->dev, "tmpfs", 0, "size=500000,mode=755") < 0)
915 die("mount tmpfs at %s: %m", cr->dev);
916 create_standard_device_nodes(cr);
917 for (n = 0; n < cr->num_ttys; n++) {
918 char *tty = msg("%s/tty%u", cr->dev, cr->tty[n]);
920 if (mknod(tty, S_IFCHR, makedev(4, cr->tty[n])) < 0)
921 die("mknod %s: %m", tty);
923 setup_termios(cr->slave[n]);
924 INFO_LOG("bind mounting %s -> %s\n", ttyname(cr->slave[n]), tty);
925 if (mount(ttyname(cr->slave[n]), tty, "none",
926 MS_BIND | MS_PRIVATE, NULL) < 0)
927 die("failed to bind mount %s: %m\n", tty);
930 console = msg("%s/console", cr->dev);
931 if (mount(ttyname(cr->slave[0]), console, "none",
932 MS_BIND | MS_PRIVATE, NULL) < 0)
933 die("failed to bind mount %s: %m\n", console);
938 * These umounts fail if the container shutdown already umounted the bind
939 * mounted devices. This is not fatal, so log only with low severity.
941 static void shutdown_console(struct container_runtime *cr)
946 for (n = 0; n < cr->num_ttys; n++) {
947 char *tty = msg("%s/tty%u", cr->dev, n);
948 if (umount2(tty, MNT_DETACH) < 0)
949 DEBUG_LOG("umount %s: %m\n", tty);
952 console = msg("%s/console", cr->dev);
953 if (umount2(console, MNT_DETACH) < 0)
954 DEBUG_LOG("umount %s: %m\n", console);
958 static char *get_socket_path(const char *container_name)
960 return msg("micoforia/%s", container_name);
963 /* Ignore everything the client sends us, but invalidate the fd on EOF. */
964 static void dispatch_client(int *client)
967 if (read(*client, buf, sizeof(buf)) <= 0) {
968 NOTICE_LOG("detaching client on fd %d\n", *client);
974 static void dispatch_socket_request(struct container_runtime *cr)
983 memset(buf, 0, sizeof(buf));
984 if (!recv_cred_buffer(cr->socket_fd, buf, sizeof(buf) - 1, &cfd, &uid))
986 if (uid != getuid()) {
987 const char msg[] = "\1EACCES";
988 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
989 NOTICE_LOG("access denied for uid %d\n", (int)uid);
992 if (strcmp(buf, "init_pid") == 0) {
994 memcpy(buf + 1, &cr->init_pid, sizeof(int));
995 send(cfd, buf, 1 + sizeof(int), MSG_DONTWAIT);
998 if (sscanf(buf, "attach %u", &minor) == 1) {
1000 } else if (sscanf(buf, "force-attach %u", &minor) == 1) {
1003 const char msg[] = "\1EINVAL";
1004 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1005 NOTICE_LOG("invalid request: %s\n", buf);
1008 for (n = 0; n < cr->num_ttys; n++) {
1009 INFO_LOG("n: %u, tty[n]: %u\n", n, cr->tty[n]);
1010 if (cr->tty[n] == minor)
1013 if (n == cr->num_ttys) {
1014 const char msg[] = "\1ENOTTY";
1015 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1016 NOTICE_LOG("tty%u is not being forwarded\n", minor);
1019 if (cr->client[n] >= 0) {
1021 close(cr->client[n]);
1024 const char msg[] = "\1EBUSY";
1025 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1026 ERROR_LOG("tty%u is already in use\n", minor);
1030 if (!pass_fd(cr->master[n], cfd)) {
1031 ERROR_LOG("could not pass master fd\n");
1034 NOTICE_LOG("attached client on fd %d to tty%u\n", cfd, minor);
1035 cr->client[n] = cfd;
1041 /* discards read data if dst < 0 */
1042 static bool copy(int src, int dst)
1047 sz1 = read(src, buf, sizeof(buf));
1051 DEBUG_LOG("read from fd %d: %m\n", src);
1057 sz2 = write(dst, buf, sz1);
1059 DEBUG_LOG("write to fd %d: %m\n", dst);
1063 DEBUG_LOG("short write to fd %d\n", dst);
1070 * The function returns only when the process receives SIGCHLD. In this case
1071 * the return value is 0 for success, 1 for failure, and 2 if the child's exit
1072 * code indicates a reboot request. Other signals are pushed down to the child
1075 static int parent_loop(pid_t pid, const struct container *c,
1076 struct container_runtime *cr)
1080 init_signal_handling();
1082 int sig, max_fileno = 0;
1086 if (OPT_GIVEN(START, FOREGROUND)) {
1087 FD_SET(STDIN_FILENO, &fds);
1088 if (STDIN_FILENO > max_fileno)
1089 max_fileno = STDIN_FILENO;
1091 FD_SET(signal_pipe[0], &fds);
1092 if (signal_pipe[0] > max_fileno)
1093 max_fileno = signal_pipe[0];
1094 FD_SET(cr->socket_fd, &fds);
1095 if (cr->socket_fd > max_fileno)
1096 max_fileno = cr->socket_fd;
1097 for (n = 0; n < cr->num_ttys; n++) {
1098 if (cr->client[n] >= 0) { /* detached */
1099 FD_SET(cr->client[n], &fds);
1100 if (cr->client[n] > max_fileno)
1101 max_fileno = cr->client[n];
1103 FD_SET(cr->master[n], &fds);
1104 if (cr->master[n] > max_fileno)
1105 max_fileno = cr->master[n];
1108 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1110 ERROR_LOG("select: %m\n");
1114 if (!FD_ISSET(signal_pipe[0], &fds))
1116 sig = next_signal();
1117 if (sig == SIGCHLD) {
1119 if (waitpid(pid, &wstatus, WNOHANG) < 0) {
1120 WARNING_LOG("wait: %m\n");
1124 if (!WIFEXITED(wstatus))
1126 if (WEXITSTATUS(wstatus) == 2)
1128 return WEXITSTATUS(wstatus) != EXIT_SUCCESS;
1132 if (FD_ISSET(cr->socket_fd, &fds))
1133 dispatch_socket_request(cr);
1134 for (n = 0; n < cr->num_ttys; n++) {
1135 if (cr->client[n] >= 0) {
1136 if FD_ISSET(cr->client[n], &fds)
1137 dispatch_client(cr->client + n);
1138 } else { /* stdout is /dev/null in background mode */
1139 if (FD_ISSET(cr->master[n], &fds))
1140 copy(cr->master[n], n == 0?
1141 STDOUT_FILENO : -1);
1144 if (OPT_GIVEN(START, FOREGROUND)) {
1145 if (FD_ISSET(STDIN_FILENO, &fds))
1146 copy(STDIN_FILENO, cr->master[0]);
1151 /* Set net namespace of child and call parent_loop(). */
1152 static int run_parent(pid_t child_pid, const struct container *c,
1153 struct container_runtime *cr)
1158 close(cr->pipe1[1]);
1159 close(cr->pipe2[0]);
1160 if (read(cr->pipe1[0], &cr->init_pid, 4) != 4) {
1161 ERROR_LOG("pipe1 read error\n");
1162 close(cr->pipe1[0]);
1163 close(cr->pipe2[1]);
1166 INFO_LOG("received grand child pid: %u\n", (unsigned)cr->init_pid);
1167 close(cr->pipe1[0]);
1168 for (n = 0; n < c->num_ifspecs; n++) {
1169 char *peer = interface_name(c, n, true);
1170 success = set_netns(peer, child_pid);
1173 ERROR_LOG("set_netns error\n");
1174 close(cr->pipe2[1]);
1178 success = write(cr->pipe2[1], "\0", 1) == 1;
1179 close(cr->pipe2[1]);
1181 ERROR_LOG("pipe2 write error\n");
1184 return parent_loop(child_pid, c, cr);
1187 static unsigned get_capdrops(const struct container *c, cap_value_t **result)
1189 static cap_value_t builtin_capdrop[] = {CAP_SYS_MODULE, CAP_SYS_TIME,
1193 *result = c->capdrop;
1194 return c->num_capdrops;
1196 if (OPT_GIVEN(MICOFORIA, DEFAULT_CAPDROP)) {
1197 *result = default_capdrop;
1198 return num_default_capdrops;
1200 *result = builtin_capdrop;
1201 return ARRAY_SIZE(builtin_capdrop);
1204 static void drop_caps(const struct container *c)
1206 cap_value_t *capdrop;
1207 unsigned n, num_capdrops;
1209 INFO_LOG("lowering bounding set capabilities\n");
1210 num_capdrops = get_capdrops(c, &capdrop);
1211 for (n = 0; n < num_capdrops; n++) {
1212 char *name = cap_to_name(capdrop[n]);
1213 DEBUG_LOG("dropping %s\n", name);
1215 if (cap_drop_bound(capdrop[n]) < 0)
1216 die_errno("cap_drop_bound");
1220 __attribute ((noreturn))
1221 static void child_loop(pid_t pid, struct container_runtime *cr)
1225 INFO_LOG("parent: %u, child: %u, init: %u\n", (unsigned) getppid(),
1226 (unsigned)getpid(), (unsigned)pid);
1227 init_signal_handling();
1235 FD_SET(signal_pipe[0], &fds);
1236 if (signal_pipe[0] > max_fileno)
1237 max_fileno = signal_pipe[0];
1238 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1240 ERROR_LOG("select: %m\n");
1243 do { if (FD_ISSET(signal_pipe[0], &fds)) {
1244 int sig = next_signal();
1245 if (sig == SIGCHLD) {
1246 if (waitpid(pid, &wstatus, WNOHANG) < 0) {
1247 WARNING_LOG("wait: %m\n");
1250 shutdown_console(cr);
1251 if (WIFSIGNALED(wstatus) &&
1252 WTERMSIG(wstatus) == 1) {
1253 NOTICE_LOG("reboot requested\n");
1256 NOTICE_LOG("container terminated\n");
1259 NOTICE_LOG("sending signal %d to container init\n",
1261 kill(pid, sig == SIGINT? SIGINT : SIGKILL);
1266 static const char *get_init_path(const struct container *c)
1268 return c->init? c->init : OPT_STRING_VAL(MICOFORIA, DEFAULT_INIT);
1272 * The child process unshares namespaces, spawns the init process which runs
1273 * the pre-exec hook and executes the container init process. This function
1274 * never returns, but both the child and the init process exit when the
1275 * container terminates. The exit code of the child tells the parent whether
1276 * it should restart the container.
1278 __attribute ((noreturn))
1279 static void run_child(const struct container *c, struct container_runtime *cr)
1282 char *init, *put_old;
1286 close(cr->socket_fd);
1287 for (n = 0; n < cr->num_ttys; n++)
1288 close(cr->master[n]);
1289 close(cr->pipe1[0]);
1290 close(cr->pipe2[1]);
1291 if (unshare(CLONE_NEWNET) < 0)
1292 die_errno("unshare net ns\n");
1293 if (unshare(CLONE_NEWPID) < 0)
1294 die_errno("unshare pid ns\n");
1295 /* fork again to become pid 1 in the new pid namespace */
1296 if ((pid = fork()) < 0)
1299 * By writing to pipe1 we tell the parent (a) we've unshared the net
1300 * namespace, and (b) the pid of the init process in the parent
1304 close(cr->pipe2[0]);
1305 if (write(cr->pipe1[1], (const char *)&pid, 4) != 4)
1306 die_errno("pipe write error");
1307 close(cr->pipe1[1]);
1308 child_loop(pid, cr); /* never returns */
1311 DEBUG_LOG("now running as pid %d\n", pid);
1312 if (read(cr->pipe2[0], &ch, 1) != 1)
1313 die_errno("pipe read error");
1314 close(cr->pipe1[1]);
1315 close(cr->pipe2[0]);
1316 if (unshare(CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWUTS) < 0)
1317 die_errno("unshare");
1318 mkdir(cr->dev, 0777);
1320 for (n = 0; n < cr->num_ttys; n++)
1321 close(cr->slave[n]);
1322 INFO_LOG("setting hostname to %s\n", c->name);
1323 if (sethostname(c->name, strlen(c->name)) < 0)
1324 die_errno("sethostname error");
1325 if (chdir(cr->root) < 0)
1326 die_errno("chdir %s", cr->root);
1330 apply_memory_limit(c);
1332 for (n = 0; n < c->num_ifspecs; n++) {
1333 char *peer = interface_name(c, n, true);
1334 char *renamed = msg("eth%u", n);
1335 if (!rename_interface(peer, renamed))
1336 die("can not rename %s to %s\n", peer, renamed);
1340 run_pre_exec_hook(c);
1341 setup_termios(STDIN_FILENO);
1342 put_old = msg("%s/mnt", cr->root);
1343 /* glibc does not provide a wrapper for pivot_root */
1344 if (syscall(SYS_pivot_root, ".", put_old) < 0)
1345 die_errno("pivot_root (put_old: %s)", put_old);
1346 if (umount2("/mnt", MNT_DETACH) < 0)
1347 die_errno("umount %s", put_old);
1349 close(STDIN_FILENO);
1350 init = xstrdup(get_init_path(c));
1351 INFO_LOG("handing over control to container init: %s\n", init);
1352 execve(init, (char *[]){init, NULL}, NULL);
1353 die_errno("failed to exec init process %s", c->init);
1357 * We need three processes, called parent, child, init, because we want one
1358 * process run with namespaces unmodified, requiring one fork. After the child
1359 * has unshared its PID namespace, it keeps its old PID, so we need to fork
1360 * again to get pid 1. The child can not terminate because the parent can not
1361 * wait(2) on its grandchild.
1363 static bool exec_container(const struct container *c)
1368 struct container_runtime cr = {0};
1372 create_cgroup_v2(c);
1373 socket_path = get_socket_path(c->name);
1374 success = listen_on_unix_socket(socket_path, &cr.socket_fd);
1376 ERROR_LOG("can not listen on unix socket %s\n", socket_path);
1380 cr.root = get_root_dir(c);
1381 cr.dev = msg("%s/dev", cr.root);
1382 cr.pts = realpath("/proc/self/fd/0", NULL);
1383 DEBUG_LOG("pts: %s\n", cr.pts);
1384 cr.num_ttys = get_container_ttys(c, &cr.tty);
1385 cr.master = xmalloc(cr.num_ttys * sizeof(int));
1386 cr.slave = xmalloc(cr.num_ttys * sizeof(int));
1387 cr.client = xmalloc(cr.num_ttys * sizeof(int));
1388 for (n = 0; n < cr.num_ttys; n++)
1391 NOTICE_LOG("starting %s\n", c->name);
1392 for (n = 0; n < cr.num_ttys; n++) {
1393 if (openpty(cr.master + n, cr.slave + n, NULL, NULL, NULL) < 0)
1395 DEBUG_LOG("pty (tty%u <-> %s)\n", n, ttyname(cr.slave[n]));
1397 /* mount rw, ignore errors */
1398 mount(NULL, cr.root, NULL, MS_REMOUNT, NULL);
1399 if (!setup_network(c))
1401 if (!run_pre_start_hook(c))
1403 if (pipe(cr.pipe1) < 0) /* child -> parent */
1405 if (pipe(cr.pipe2) < 0)
1406 die_errno("pipe2"); /* parent -> child */
1407 if ((pid = fork()) < 0)
1410 run_child(c, &cr); /* never returns */
1411 ret = run_parent(pid, c, &cr);
1414 NOTICE_LOG("rebooting\n");
1415 for (n = 0; n < cr.num_ttys; n++) {
1416 close(cr.master[n]);
1422 static char *get_container_logfile(const char *name)
1424 return msg("%s/%s", OPT_STRING_VAL(MICOFORIA, LOGDIR), name);
1427 static bool start_container(const struct container *c)
1431 struct termios tios;
1434 if (is_locked(c->name, &pid)) {
1435 ERROR_LOG("%s is locked by pid %u\n", c->name, (unsigned)pid);
1438 if (OPT_GIVEN(START, FOREGROUND)) {
1439 if (!isatty(STDIN_FILENO) || !isatty(STDOUT_FILENO)) {
1440 ERROR_LOG("both stdin and stdout must be terminals\n");
1443 if (tcgetattr(STDIN_FILENO, &tios) < 0) {
1444 ERROR_LOG("tcgetattr: %m\n");
1448 if ((pid = fork()) < 0)
1452 logfile = get_container_logfile(c->name);
1456 if (!try_lock(c->name, &pid))
1457 die("%s is locked by pid %u", c->name, (unsigned)pid);
1458 success = exec_container(c);
1459 if (OPT_GIVEN(START, FOREGROUND)) {
1460 if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &tios) < 0)
1461 ERROR_LOG("tcsetattr: %m\n");
1463 exit(success? EXIT_SUCCESS : EXIT_FAILURE);
1466 static void check_container_args(void)
1468 unsigned n, num_inputs;
1469 struct container *c;
1471 num_inputs = lls_num_inputs(sublpr);
1472 if (num_inputs == 0) {
1473 if (num_containers == 0)
1474 die("no container configured\n");
1475 if (OPT_GIVEN(START, FOREGROUND) && num_containers > 1)
1476 die("must specify container for foreground mode");
1478 if (OPT_GIVEN(START, FOREGROUND) && num_inputs > 1)
1479 die("can start only one container in foreground mode");
1480 for (n = 0; n < num_inputs; n++) {
1481 const char *name = lls_input(n, sublpr);
1482 c = get_container(name);
1484 die("container not configured: %s", name);
1489 struct container_arg_iter {
1493 #define INITIALIZED_CAI(_cai) {.idx = 0}
1495 static struct container *cai_next(struct container_arg_iter *cai, bool *skipped)
1497 unsigned num_inputs = lls_num_inputs(sublpr);
1501 if (num_inputs == 0) {
1502 if (cai->idx >= num_containers)
1504 return container[cai->idx++];
1506 for (; cai->idx < num_inputs; cai->idx++) {
1507 const char *name = lls_input(cai->idx, sublpr);
1508 struct container *c = get_container(name);
1510 ERROR_LOG("%s: not configured\n", name);
1521 static bool for_each_container_arg(bool (*f)(const struct container *c))
1523 struct container *c;
1524 bool success = true;
1526 struct container_arg_iter cai = INITIALIZED_CAI(cai);
1528 while ((c = cai_next(&cai, &skipped)))
1529 if (!f(c) || skipped)
1534 static bool com_start(void)
1536 const char *logdir = OPT_STRING_VAL(MICOFORIA, LOGDIR);
1538 check_container_args();
1539 if (logdir[0] == '\0')
1540 die_empty_arg("loggir");
1542 if (mkdir(logdir, 0777) < 0 && errno != EEXIST)
1543 die_errno("mkdir %s", logdir);
1544 return for_each_container_arg(start_container);
1546 EXPORT_CMD_HANDLER(start);
1548 static bool send_signal_to_container(int signum, const struct container *c)
1553 if (!is_locked(c->name, &pid)) {
1554 INFO_LOG("%s is not running\n", c->name);
1557 DEBUG_LOG("sending signal %d to pid %u\n", signum, (unsigned)pid);
1558 success = kill(pid, signum) >= 0;
1560 ERROR_LOG("kill %s: %m\n", c->name);
1564 static void clean_env(void)
1566 char *term = getenv("TERM");
1570 setenv("TERM", term, 0);
1571 setenv("PATH", "/root/bin:/usr/local/sbin:/usr/local/bin"
1572 ":/sbin:/usr/sbin:/bin:/usr/bin", 0);
1573 setenv("USER", "root", 0);
1574 setenv("LOGNAME", "root", 0);
1575 setenv("HOME", "/root", 0);
1578 static bool request_init_pid(const char *name, int *result)
1580 char *socket_path = get_socket_path(name);
1584 success = request_int(socket_path, "init_pid", result);
1587 ERROR_LOG("could not determine init pid of %s\n", name);
1591 static bool shutdown_container(const struct container *c)
1595 char *argv[] = {"nsenter", "-w", "-a", "-r", "-t", str, "halt", NULL};
1597 if (!is_locked(c->name, NULL)) {
1598 if (lls_num_inputs(sublpr) == 0)
1600 ERROR_LOG("container not running: %s\n", c->name);
1608 if (!request_init_pid(c->name, &pid))
1609 _exit(EXIT_FAILURE);
1610 sprintf(str, "%d", pid);
1612 execvp(argv[0], argv);
1613 _exit(EXIT_FAILURE);
1616 static bool container_is_dead(const struct container *c)
1618 return !is_locked(c->name, NULL);
1621 static bool wait_for_containers_to_die(void)
1627 while (ms < 20000) {
1628 ts.tv_sec = ms / 1000;
1629 ts.tv_nsec = (ms % 1000) * 1000 * 1000;
1630 if (nanosleep(&ts, NULL) < 0)
1632 success = for_each_container_arg(container_is_dead);
1640 static bool com_stop(void)
1642 bool success = for_each_container_arg(shutdown_container);
1646 if (!OPT_GIVEN(STOP, WAIT))
1648 return wait_for_containers_to_die();
1650 EXPORT_CMD_HANDLER(stop);
1652 static bool reboot_container(const struct container *c)
1654 return send_signal_to_container(SIGINT, c);
1657 static bool com_reboot(void)
1659 return for_each_container_arg(reboot_container);
1661 EXPORT_CMD_HANDLER(reboot);
1663 static bool kill_container(const struct container *c)
1665 return send_signal_to_container(SIGUSR1, c);
1668 static bool com_kill(void)
1670 bool success = for_each_container_arg(kill_container);
1674 if (!OPT_GIVEN(KILL, WAIT))
1676 return wait_for_containers_to_die();
1678 EXPORT_CMD_HANDLER(kill);
1680 static void list_container_verbose(const struct container *c)
1685 cap_value_t *capdrop;
1687 char cores_str[25] = "unlimited";
1688 unsigned cores = get_cpu_cores(c);
1690 printf("%s:\n", c->name);
1691 printf("\tpre-start hook: %s\n", get_pre_start_hook(c));
1692 printf("\tpre-exec hook: %s\n", get_pre_exec_hook(c));
1693 root = get_root_dir(c);
1694 printf("\troot dir: %s\n", root);
1696 printf("\tinit path: %s\n", get_init_path(c));
1697 for (n = 0; n < c->num_ifspecs; n++) {
1698 char pretty_hwaddr[18];
1699 char *iface = interface_name(c, n, false);
1700 pretty_print_hwaddr(c->ifspec[n].hwaddr, pretty_hwaddr);
1701 printf("\tinterface #%u: %s (%s)\n", n, iface, pretty_hwaddr);
1704 N = get_dacl(c, &word_list);
1705 for (n = 0; n < N; n++)
1706 printf("\tdac entry #%u: %s %s\n", n, word_list[n][0] == 'a'?
1707 "allow" : "deny", word_list[n] + 1);
1708 N = get_iospecs(c, &word_list);
1709 for (n = 0; n < N; n++)
1710 printf("\tiospec #%u: %s\n", n, word_list[n]);
1712 sprintf(cores_str, "%u", cores);
1713 printf("\tCPU core limit: %s\n", cores_str);
1714 printf("\tmemory limit: %uG\n", get_memory_limit(c));
1715 N = get_capdrops(c, &capdrop);
1716 for (n = 0; n < N; n++)
1717 printf("\tcapdrop #%u: %s\n", n, cap_to_name(capdrop[n]));
1718 N = get_container_ttys(c, &tty);
1719 for (n = 0; n < N; n++)
1720 printf("\ttty #%u: %u\n", n, tty[n]);
1723 static bool com_ls(void)
1725 struct container *c;
1726 bool skipped, success = true;
1727 struct container_arg_iter cai = INITIALIZED_CAI(cai);
1729 while ((c = cai_next(&cai, &skipped))) {
1733 if (!is_locked(c->name, &pid)) {
1734 if (!OPT_GIVEN(LS, ALL)) {
1740 if (OPT_GIVEN(LS, VERBOSE)) {
1741 list_container_verbose(c);
1744 if (OPT_GIVEN(LS, LONG)) {
1746 printf("%u\t", (unsigned)pid);
1749 printf("%u\t", get_cpu_cores(c));
1750 printf("%uG\t", get_memory_limit(c));
1751 printf("%s\n", c->name);
1754 if (!OPT_GIVEN(LS, QUIET))
1755 printf("%s\n", c->name);
1757 if (skipped) /* needed if the last given container arg is invalid */
1761 EXPORT_CMD_HANDLER(ls);
1763 static bool list_container_processes(const struct container *c)
1767 char *argv[] = {"pstree", "-anp", str, NULL};
1770 success = is_locked(c->name, &pid);
1772 if (lls_num_inputs(sublpr) == 0)
1774 ERROR_LOG("container \"%s\" is not running\n", c->name);
1777 if (!OPT_GIVEN(PS, ALL) && !request_init_pid(c->name, &pid))
1779 sprintf(str, "%d", pid);
1780 success = xexec(argv, NULL);
1784 static bool com_ps(void)
1786 return for_each_container_arg(list_container_processes);
1788 EXPORT_CMD_HANDLER(ps);
1790 static bool com_attach(void)
1796 int master, ret, socket_fd;
1797 bool have_escape = false;
1798 struct termios tios;
1799 uint32_t minor = OPT_UINT32_VAL(ATTACH, TTY);
1802 if (!isatty(STDIN_FILENO) || !isatty(STDOUT_FILENO)) {
1803 ERROR_LOG("both stdin and stdout must be terminals\n");
1806 if (tcgetattr(STDIN_FILENO, &tios) < 0)
1807 die_errno("tcgetattr");
1808 ret = lls_check_arg_count(sublpr, 1, 1, &errctx);
1810 die_lopsub(ret, &errctx);
1811 arg = lls_input(0, sublpr);
1812 if (!is_locked(arg, &pid)) {
1813 ERROR_LOG("container not running: %s\n", arg);
1816 socket_path = get_socket_path(arg);
1817 if (OPT_GIVEN(ATTACH, FORCE))
1818 rq = msg("force-attach %u", minor);
1820 rq = msg("attach %u", minor);
1821 socket_fd = request_fd(socket_path, rq, &master);
1824 INFO_LOG("Attached to /dev/tty%u of container %s\n", minor, arg);
1825 NOTICE_LOG("Type CTRL+a q to quit\n");
1826 setup_termios(STDIN_FILENO);
1827 setup_termios(master);
1832 FD_SET(STDIN_FILENO, &fds);
1833 if (STDIN_FILENO > max_fileno)
1834 max_fileno = STDIN_FILENO;
1835 FD_SET(master, &fds);
1836 if (master > max_fileno)
1837 max_fileno = master;
1838 FD_SET(socket_fd, &fds);
1839 if (socket_fd > max_fileno)
1840 max_fileno = socket_fd;
1841 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1843 ERROR_LOG("select: %m\n");
1846 if (FD_ISSET(socket_fd, &fds))
1848 if (FD_ISSET(STDIN_FILENO, &fds)) {
1850 if (read(STDIN_FILENO, &c, 1) <= 0)
1852 if (c == 1 && !have_escape)
1854 else if (c == 'q' && have_escape)
1856 else if (write(master, &c, 1) != 1)
1859 if (FD_ISSET(master, &fds)) {
1860 if (!copy(master, STDOUT_FILENO))
1864 if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &tios) < 0)
1865 ERROR_LOG("tcsetattr: %m\n");
1869 EXPORT_CMD_HANDLER(attach);
1871 static bool com_help(void)
1874 char *errctx, *help;
1876 const struct lls_command *cmd;
1878 ret = lls_check_arg_count(sublpr, 0, 1, &errctx);
1880 die_lopsub(ret, &errctx);
1881 if (lls_num_inputs(sublpr) == 0) {
1882 show_subcommand_summary(OPT_GIVEN(HELP, LONG));
1885 arg = lls_input(0, sublpr);
1886 ret = lls_lookup_subcmd(arg, micoforia_suite, &errctx);
1888 die_lopsub(ret, &errctx);
1889 cmd = lls_cmd(ret, micoforia_suite);
1890 if (OPT_GIVEN(HELP, LONG))
1891 help = lls_long_help(cmd);
1893 help = lls_short_help(cmd);
1894 printf("%s\n", help);
1898 EXPORT_CMD_HANDLER(help);
1900 static bool com_configtest(void)
1902 printf("Syntax Ok\n");
1905 EXPORT_CMD_HANDLER(configtest);
1907 static bool com_edit(void)
1909 char *ed = getenv("EDITOR"); /* must not be freed */
1910 char *conf = get_config_file_path();
1911 char *argv[] = {ed? ed : "vi", conf, NULL};
1912 bool success = xexec(argv, NULL);
1917 EXPORT_CMD_HANDLER(edit);
1919 static bool com_enter(void)
1923 char *nsenter_args[] = {"nsenter", "-w", "-a", "-r", "-t"};
1924 const unsigned nna = ARRAY_SIZE(nsenter_args); /* num nsenter args */
1925 char *dflt_cmd[] = {"login", "-f", "root"};
1926 unsigned n, N, ni = lls_num_inputs(sublpr);
1927 unsigned nea = ni > 1? ni - 1 : ARRAY_SIZE(dflt_cmd); /* num extra args */
1932 struct termios tios;
1935 ret = lls_check_arg_count(sublpr, 1, INT_MAX, &errctx);
1937 die_lopsub(ret, &errctx);
1938 arg = lls_input(0, sublpr);
1939 if (!is_locked(arg, &pid)) {
1940 ERROR_LOG("container not running: %s\n", arg);
1943 if (!request_init_pid(arg, &pid))
1945 N = nna + nea + 2; /* +1 for arg to -t and +1 for terminating NULL */
1946 argv = xmalloc(N * sizeof(char *));
1947 for (n = 0; n < nna; n++)
1948 argv[n] = nsenter_args[n];
1949 sprintf(str, "%d", pid);
1951 for (n = 0; n < nea; n++)
1952 argv[nna + 1 + n] = ni > 1? (char *)lls_input(n + 1, sublpr)
1957 if (isatty(STDIN_FILENO) && isatty(STDOUT_FILENO)) {
1958 if (tcgetattr(STDIN_FILENO, &tios) >= 0)
1961 success = xexec(argv, NULL);
1962 if (tty) { /* reset terminal settings */
1964 * First give up the controlling terminal. Without this, the
1965 * command gets SIGSTOP and goes to the background. We ignore
1966 * errors here because nobody cares about a messed up terminal.
1968 ioctl(STDIN_FILENO, TIOCNOTTY);
1969 tcsetattr(STDIN_FILENO, TCSAFLUSH, &tios);
1974 EXPORT_CMD_HANDLER(enter);
1976 static bool com_log(void)
1979 char *errctx, *logfile;
1980 bool success, use_less = isatty(STDIN_FILENO) && isatty(STDOUT_FILENO);
1981 char *argv[] = {use_less? "less" : "cat", NULL /* filename */, NULL};
1983 ret = lls_check_arg_count(sublpr, 1, 1, &errctx);
1985 die_lopsub(ret, &errctx);
1986 logfile = get_container_logfile(lls_input(0, sublpr));
1988 success = xexec(argv, NULL);
1992 EXPORT_CMD_HANDLER(log);
1994 int main(int argc, char *argv[])
1998 const struct micoforia_user_data *ud;
1999 unsigned num_inputs;
2002 parse_options(argc, argv, CMD_PTR(MICOFORIA), &lpr);
2003 loglevel_arg_val = OPT_UINT32_VAL(MICOFORIA, LOGLEVEL);
2005 num_inputs = lls_num_inputs(lpr);
2006 ret = lls_lookup_subcmd(argv[argc - num_inputs], micoforia_suite, &errctx);
2008 die_lopsub(ret, &errctx);
2009 subcmd = lls_cmd(ret, micoforia_suite);
2010 parse_options(num_inputs, argv + argc - num_inputs, subcmd, &sublpr);
2011 ud = lls_user_data(subcmd);
2012 exit(ud->handler()? EXIT_SUCCESS : EXIT_FAILURE);