1 /* SPDX-License-Identifier: GPL-2.0-only */
10 #include <sys/sysmacros.h>
13 #include <sys/socket.h>
14 #include <sys/capability.h>
15 #include <sys/syscall.h>
17 #include "micoforia.lsg.h"
19 static struct lls_parse_result *lpr, *sublpr;
20 unsigned loglevel_arg_val = 4;
33 struct ifspec *ifspec;
34 /* this is never zero, even if no ifspec was given */
37 unsigned num_dac_entries;
39 unsigned num_io_max_entries;
40 /* ~0U: not given, 0: unlimited */
42 unsigned memory_limit;
46 unsigned num_capdrops;
51 static struct container **container;
52 static unsigned num_containers;
54 struct container_runtime {
55 int pipe1[2], pipe2[2]; /* for startup communication */
58 int *master, *slave, *client;
60 int init_pid; /* in the parent namespace */
61 char *pts, *root, *dev;
65 static char **default_dacl, **default_io_max;
66 unsigned num_default_dac_entries, num_default_io_max_entries;
67 static cap_value_t *default_capdrop;
68 unsigned num_default_capdrops;
69 uint32_t *default_tty;
70 unsigned num_default_ttys;
71 static const struct lls_command *subcmd;
72 /* does not allocate memory */
73 void m7a_log(int ll, const char* fmt,...)
77 if (ll < loglevel_arg_val)
80 if (subcmd == lls_cmd(LSG_MICOFORIA_CMD_START, micoforia_suite)) {
84 assert(clock_gettime(CLOCK_REALTIME, &t) == 0);
85 tm = localtime(&t.tv_sec);
86 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
87 fprintf(stderr, "%s:%04lu ", str,
88 (long unsigned)t.tv_nsec / 1000 / 1000);
89 fprintf(stderr, "(%u) ", (unsigned)getpid());
91 vfprintf(stderr, fmt, argp);
95 static void die_lopsub(int lopsub_ret, char **errctx)
97 const char *m = lls_strerror(-lopsub_ret);
99 ERROR_LOG("%s: %s\n", *errctx, m);
101 ERROR_LOG("%s\n", m);
107 #define FOR_EACH_CONTAINER(_c) for ( \
108 struct container **_cp = container; \
110 (_cp)++, (_c) = *(_cp) \
113 static struct container *get_container(const char *name)
116 FOR_EACH_CONTAINER(c) {
117 if (!strcmp(c->name, name))
123 static struct container *get_or_append_container(const char *name)
125 struct container *c = get_container(name);
128 container = xrealloc(container,
129 (++num_containers + 1) * sizeof(struct container *));
130 c = container[num_containers - 1] = xzmalloc(sizeof(struct container));
131 c->name = xstrdup(name);
132 /* ~0U means: not given */
134 c->memory_limit = ~0U;
136 container[num_containers] = NULL;
140 static unsigned get_container_ttys(const struct container *c, uint32_t **result)
142 static uint32_t dflt = {1};
143 if (c->num_ttys > 0) {
147 if (num_default_ttys > 0) {
148 *result = default_tty;
149 return num_default_ttys;
155 enum clo_given_counter {
156 CLOGC_DEFAULT_CGROUP_DAC,
158 CLOGC_DEFAULT_IO_MAX,
163 static unsigned clo_given_counter[NUM_CLOGCS];
165 static void append_dac_entry(const char *arg, char ***listp, unsigned *count)
167 char *val = parse_cgroup_acl(arg);
169 *listp = xrealloc(*listp, (*count + 1) * sizeof(char *));
170 (*listp)[*count - 1] = val;
171 (*listp)[*count] = NULL;
174 static void append_io_max_entry(const char *arg, char ***listp, unsigned *count)
177 *listp = xrealloc(*listp, (*count + 1) * sizeof(char *));
178 (*listp)[*count - 1] = xstrdup(arg);
179 (*listp)[*count] = NULL;
182 static void check_options(void)
190 container = xzmalloc(sizeof(struct container *));
191 /* loop backwards to let command line opts override config file opts */
192 for (n = OPT_GIVEN(MICOFORIA, CONTAINER) - 1; n != ~0U; n--) {
193 arg = OPT_STRING_VAL_N(n, MICOFORIA, CONTAINER);
195 get_or_append_container(arg);
197 for (n = OPT_GIVEN(MICOFORIA, PRE_START_HOOK) - 1; n != ~0U; n--) {
198 arg = OPT_STRING_VAL_N(n, MICOFORIA, PRE_START_HOOK);
199 parse_compound_arg(arg, "pre-start-hook", &name, &val);
200 c = get_or_append_container(name);
202 free(c->pre_start_hook);
203 c->pre_start_hook = val;
205 for (n = OPT_GIVEN(MICOFORIA, PRE_EXEC_HOOK) - 1; n != ~0U; n--) {
206 arg = OPT_STRING_VAL_N(n, MICOFORIA, PRE_EXEC_HOOK);
207 parse_compound_arg(arg, "pre-exec-hook", &name, &val);
208 c = get_or_append_container(name);
210 free(c->pre_exec_hook);
211 c->pre_exec_hook = val;
213 for (n = OPT_GIVEN(MICOFORIA, CAPDROP) - 1; n != ~0U; n--) {
215 arg = OPT_STRING_VAL_N(n, MICOFORIA, CAPDROP);
216 parse_compound_arg(arg, "capabilities", &name, &val);
217 c = get_or_append_container(name);
218 if (cap_from_name(val, &cap_val) < 0)
219 die_errno("%s: invalid capability: %s", name, val);
220 c->capdrop = xrealloc(c->capdrop,
221 ++c->num_capdrops * sizeof(cap_value_t));
222 c->capdrop[c->num_capdrops - 1] = cap_val;
226 for (n = 0; n < OPT_GIVEN(MICOFORIA, DEFAULT_CAPDROP); n++) {
228 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CAPDROP);
229 if (cap_from_name(arg, &cap_val) < 0)
230 die_errno("invalid default capability: %s", val);
231 default_capdrop = xrealloc(default_capdrop,
232 ++num_default_capdrops * sizeof(cap_value_t));
233 default_capdrop[num_default_capdrops - 1] = cap_val;
235 for (n = OPT_GIVEN(MICOFORIA, TTY) - 1; n != ~0U; n--) {
237 arg = OPT_STRING_VAL_N(n, MICOFORIA, TTY);
238 parse_compound_arg(arg, "tty", &name, &val);
239 c = get_or_append_container(name);
240 minor = atou32(val, "tty");
242 die("can not capture tty0");
243 c->tty = xrealloc(c->tty, ++c->num_ttys * sizeof(uint32_t));
244 c->tty[c->num_ttys - 1] = minor;
248 for (n = 0; n < OPT_GIVEN(MICOFORIA, DEFAULT_TTY); n++) {
249 uint32_t minor = OPT_UINT32_VAL_N(n, MICOFORIA, DEFAULT_TTY);
251 die("can not capture tty0");
252 default_tty = xrealloc(default_tty,
253 ++num_default_ttys * sizeof(uint32_t));
254 default_tty[num_default_ttys - 1] = minor;
257 for (n = OPT_GIVEN(MICOFORIA, ROOT_DIRECTORY) - 1; n != ~0U ; n--) {
258 arg = OPT_STRING_VAL_N(n, MICOFORIA, ROOT_DIRECTORY);
259 parse_compound_arg(arg, "root-directory", &name, &val);
260 c = get_or_append_container(name);
265 u32 = OPT_UINT32_VAL(MICOFORIA, DEFAULT_CPU_CORES);
266 check_range(u32, 0, 65536, "default-cpu-cores");
267 for (n = OPT_GIVEN(MICOFORIA, CPU_CORES) - 1; n != ~0U ; n--) {
268 arg = OPT_STRING_VAL_N(n, MICOFORIA, CPU_CORES);
269 parse_compound_arg(arg, "cpu-cores", &name, &val);
270 c = get_or_append_container(name);
272 u32 = atou32(val, "cpu-cores");
274 check_range(u32, 0, 65536, "cpu-cores");
277 u32 = OPT_UINT32_VAL(MICOFORIA, DEFAULT_MEMORY_LIMIT);
278 check_range(u32, 0, 1024 * 1024, "default-memory-limit");
279 for (n = OPT_GIVEN(MICOFORIA, MEMORY_LIMIT) - 1; n != ~0U ; n--) {
280 arg = OPT_STRING_VAL_N(n, MICOFORIA, MEMORY_LIMIT);
281 parse_compound_arg(arg, "memory-limit", &name, &val);
282 c = get_or_append_container(name);
284 u32 = atou32(val, "memory-limit");
286 check_range(u32, 0, 1024 * 1024, "memory-limit");
287 c->memory_limit = u32;
289 for (n = OPT_GIVEN(MICOFORIA, INIT) - 1; n != ~0U ; n--) {
290 arg = OPT_STRING_VAL_N(n, MICOFORIA, INIT);
291 parse_compound_arg(arg, "init", &name, &val);
292 c = get_or_append_container(name);
297 for (n = 0; n < OPT_GIVEN(MICOFORIA, NET); n++) {
298 struct ifspec *ifspec;
299 arg = OPT_STRING_VAL_N(n, MICOFORIA, NET);
300 parse_compound_arg(arg, "net", &name, &val);
301 c = get_or_append_container(name);
303 c->ifspec = xrealloc(c->ifspec,
304 ++c->num_ifspecs * sizeof(struct ifspec));
305 ifspec = c->ifspec + c->num_ifspecs - 1;
306 parse_ifspec(val, &ifspec->bridge, ifspec->hwaddr);
310 m = clo_given_counter[CLOGC_DEFAULT_CGROUP_DAC];
311 for (n = m; n < OPT_GIVEN(MICOFORIA, DEFAULT_CGROUP_DAC); n++) {
312 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CGROUP_DAC);
313 append_dac_entry(arg, &default_dacl, &num_default_dac_entries);
315 for (n = 0; n < m; n++) {
316 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CGROUP_DAC);
317 append_dac_entry(arg, &default_dacl, &num_default_dac_entries);
319 m = clo_given_counter[CLOGC_CGROUP_DAC];
320 for (n = m; n < OPT_GIVEN(MICOFORIA, CGROUP_DAC); n++) {
321 arg = OPT_STRING_VAL_N(n, MICOFORIA, CGROUP_DAC);
322 parse_compound_arg(arg, "cgroup-dac", &name, &val);
323 c = get_or_append_container(name);
325 append_dac_entry(val, &c->dacl, &c->num_dac_entries);
328 for (n = 0; n < m; n++) {
329 arg = OPT_STRING_VAL_N(n, MICOFORIA, CGROUP_DAC);
330 parse_compound_arg(arg, "cgroup-dac", &name, &val);
331 c = get_or_append_container(name);
333 append_dac_entry(val, &c->dacl, &c->num_dac_entries);
337 m = clo_given_counter[CLOGC_DEFAULT_IO_MAX];
338 for (n = m; n < OPT_GIVEN(MICOFORIA, DEFAULT_IO_MAX); n++) {
339 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_IO_MAX);
340 append_io_max_entry(arg, &default_io_max, &num_default_io_max_entries);
342 for (n = 0; n < m; n++) {
343 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_IO_MAX);
344 append_io_max_entry(arg, &default_io_max, &num_default_io_max_entries);
346 m = clo_given_counter[CLOGC_IO_MAX];
347 for (n = m; n < OPT_GIVEN(MICOFORIA, IO_MAX); n++) {
348 arg = OPT_STRING_VAL_N(n, MICOFORIA, IO_MAX);
349 parse_compound_arg(arg, "io-max", &name, &val);
350 c = get_or_append_container(name);
352 append_io_max_entry(val, &c->io_max, &c->num_io_max_entries);
355 for (n = 0; n < m; n++) {
356 arg = OPT_STRING_VAL_N(n, MICOFORIA, IO_MAX);
357 parse_compound_arg(arg, "io-max", &name, &val);
358 c = get_or_append_container(name);
360 append_io_max_entry(val, &c->io_max, &c->num_io_max_entries);
364 /* init default c->ifspec[] */
365 FOR_EACH_CONTAINER(c) {
366 if (c->num_ifspecs == 0) {
367 const char *br = OPT_STRING_VAL(MICOFORIA, DEFAULT_BRIDGE);
369 c->ifspec = xmalloc(sizeof(struct ifspec));
370 c->ifspec[0].bridge = xstrdup(br);
371 memset(c->ifspec[0].hwaddr, 0, 6);
377 static void show_subcommand_summary(bool verbose)
381 #define LSG_MICOFORIA_CMD(_name) #_name
382 static const char * const subcommand_names[] = {LSG_MICOFORIA_SUBCOMMANDS NULL};
383 #undef LSG_MICOFORIA_CMD
384 printf("Available subcommands:\n");
386 const struct lls_command *cmd;
387 for (i = 1; (cmd = lls_cmd(i, micoforia_suite)); i++) {
388 const char *purpose = lls_purpose(cmd);
389 const char *name = lls_command_name(cmd);
390 printf("%-12s%s\n", name, purpose);
395 for (i = 0; i < LSG_NUM_MICOFORIA_SUBCOMMANDS; i++) {
402 n += printf("%s", subcommand_names[i]);
408 const char *GET_VERSION(void);
409 static void handle_version_and_help(void)
413 if (OPT_GIVEN(MICOFORIA, VERSION)) {
414 printf(PACKAGE " %s\n"
415 "Copyright (C) " COPYRIGHT_YEAR " " AUTHOR ".\n"
416 "License: " LICENSE " <" LICENSE_URL ">.\n"
417 "This is free software: you are free to change and redistribute it.\n"
418 "There is NO WARRANTY, to the extent permitted by law.\n"
420 "Web page: " URL "\n"
421 "Clone URL: " CLONE_URL "\n"
422 "Gitweb: " GITWEB_URL "\n"
423 "Author's Home Page: " HOME_URL "\n"
424 "Send feedback to: " AUTHOR " <" EMAIL ">\n"
430 if (OPT_GIVEN(MICOFORIA, DETAILED_HELP))
431 help = lls_long_help(CMD_PTR(MICOFORIA));
432 else if (OPT_GIVEN(MICOFORIA, HELP))
433 help = lls_short_help(CMD_PTR(MICOFORIA));
434 else if (lls_num_inputs(lpr) == 0) {
435 show_subcommand_summary(true /* verbose */);
439 printf("%s\n", help);
444 static char *get_config_file_path(void)
449 if (OPT_GIVEN(MICOFORIA, CONFIG_FILE))
450 return xstrdup(OPT_STRING_VAL(MICOFORIA, CONFIG_FILE));
451 pw = getpwuid(getuid());
452 home = pw? pw->pw_dir : "/root";
453 return msg("%s/.micoforiarc", home);
456 static void parse_options(int argc, char **argv, const struct lls_command *cmd,
457 struct lls_parse_result **lprp)
465 char **cf_argv, *errctx = NULL;
466 const char *subcmd_name;
467 struct lls_parse_result *merged_lpr, *cf_lpr;
469 ret = lls_parse(argc, argv, cmd, lprp, &errctx);
471 die_lopsub(ret, &errctx);
472 handle_version_and_help();
473 clo_given_counter[CLOGC_DEFAULT_CGROUP_DAC] = OPT_GIVEN(MICOFORIA,
475 clo_given_counter[CLOGC_CGROUP_DAC] = OPT_GIVEN(MICOFORIA, CGROUP_DAC);
476 clo_given_counter[CLOGC_DEFAULT_IO_MAX] =
477 OPT_GIVEN(MICOFORIA, DEFAULT_IO_MAX);
478 clo_given_counter[CLOGC_IO_MAX] = OPT_GIVEN(MICOFORIA, IO_MAX);
479 config_file = get_config_file_path();
480 ret = open(config_file, O_RDONLY);
482 if (errno != ENOENT || OPT_GIVEN(MICOFORIA, CONFIG_FILE))
483 die_errno("can not open config file %s", config_file);
484 /* no config file -- nothing to do */
489 ret = fstat(fd, &statbuf);
491 die_errno("failed to stat config file %s", config_file);
492 sz = statbuf.st_size;
493 if (sz == 0) { /* config file is empty -- nothing to do */
497 map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
498 if (map == MAP_FAILED)
499 die_errno("failed to mmap config file %s", config_file);
500 subcmd_name = (cmd == CMD_PTR(MICOFORIA))? NULL : lls_command_name(cmd);
501 ret = lls_convert_config(map, sz, subcmd_name, &cf_argv,
505 ERROR_LOG("failed to convert config file %s\n", config_file);
506 die_lopsub(ret, &errctx);
509 ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
510 lls_free_argv(cf_argv);
512 die_lopsub(ret, &errctx);
513 /* command line options override config file options */
514 ret = lls_merge(*lprp, cf_lpr, cmd, &merged_lpr, &errctx);
516 die_lopsub(ret, &errctx);
517 lls_free_parse_result(cf_lpr, cmd);
518 lls_free_parse_result(*lprp, cmd);
526 static const char *get_pre_start_hook(const struct container *c)
528 if (c->pre_start_hook)
529 return c->pre_start_hook;
530 return OPT_STRING_VAL(MICOFORIA, DEFAULT_PRE_START_HOOK);
533 static const char *get_pre_exec_hook(const struct container *c)
535 if (c->pre_exec_hook)
536 return c->pre_exec_hook;
537 return OPT_STRING_VAL(MICOFORIA, DEFAULT_PRE_EXEC_HOOK);
540 static char *get_root_dir(const struct container *c)
543 return xstrdup(c->root_dir);
544 return msg("%s/%s", OPT_STRING_VAL(MICOFORIA, DEFAULT_ROOT_PREFIX), c->name);
547 static char *get_ifspec_string(const struct container *c)
552 assert(c->num_ifspecs > 0);
553 for (n = 0; n < c->num_ifspecs; n++) {
554 uint8_t *x = c->ifspec[n].hwaddr;
555 char *tmp = msg("%s%s%s:%02x:%02x:%02x:%02x:%02x:%02x",
559 x[0], x[1], x[2], x[3], x[4], x[5]
567 static char *interface_name(const struct container *c, unsigned idx, bool peer)
569 assert(idx < c->num_ifspecs);
570 if (c->num_ifspecs == 1)
571 return peer? msg("%s-g", c->name) : xstrdup(c->name);
573 return msg("%s-%s-g", c->name, c->ifspec[idx].bridge);
574 return msg("%s-%s", c->name, c->ifspec[idx].bridge);
577 static void set_m7a_root_dir_env(const struct container *c)
579 char *root = get_root_dir(c);
580 DEBUG_LOG("root dir: %s\n", root);
581 setenv("MICOFORIA_ROOT_DIR", root, 1);
585 static bool run_pre_start_hook(const struct container *c)
588 char *cmd = xstrdup(get_pre_start_hook(c));
589 char *argv[] = {"/bin/sh", "-c", cmd, NULL};
592 setenv("MICOFORIA_CONTAINER_NAME", c->name, 1);
593 set_m7a_root_dir_env(c);
595 ifspec = get_ifspec_string(c);
596 DEBUG_LOG("ifspecs: %s\n", ifspec);
597 setenv("MICOFORIA_IFSPECS", ifspec, 1);
600 INFO_LOG("running pre-start hook %s\n", cmd);
601 success = xexec(argv, NULL);
604 ERROR_LOG("pre-start hook failed\n");
605 unsetenv("MICOFORIA_CONTAINER_NAME");
606 unsetenv("MICOFORIA_IFSPECS");
607 unsetenv("MICOFORIA_ROOT_DIR");
611 static void run_pre_exec_hook(const struct container *c)
613 char *cmd = xstrdup(get_pre_exec_hook(c));
614 char *argv[] = {"/bin/sh", "-c", cmd, NULL};
616 INFO_LOG("/bin/sh -c '%s'\n", cmd);
617 set_m7a_root_dir_env(c);
618 if (!xexec(argv, NULL))
619 die("%s: pre-exec hook failed", c->name);
621 unsetenv("MICOFORIA_ROOT_DIR");
624 static void write_cgroup(const char *path, const char *txt)
629 if ((fd = open(path, O_WRONLY)) < 0)
630 die_errno("open %s", path);
632 if (write(fd, txt, sz) != sz)
633 die_errno("could not write to %s", path);
637 static unsigned get_dacl(const struct container *c, char ***result)
639 static char *dflt[] = {
640 "da", /* deny access to all devices except the ones below */
641 "ac 1:3 rwm", /* null */
642 "ac 1:5 rwm", /* zero */
643 "ac 1:7 rwm", /* full */
644 "ac 1:8 rwm", /* random */
645 "ac 1:9 rwm", /* urandom */
646 "ac 4:* rwm", /* tty?* */
647 "ac 5:0 rwm", /* tty */
648 "ac 5:2 rwm", /* ptmx */
649 "ac 136:* rwm", /* pts */
651 if (c->num_dac_entries > 0) {
653 return c->num_dac_entries;
655 if (num_default_dac_entries > 0) {
656 *result = default_dacl;
657 return num_default_dac_entries;
660 return ARRAY_SIZE(dflt);
663 static void apply_dacl(const struct container *c)
666 unsigned n, num_entries;
667 char *m7a_dir, *container_dir, *allow, *deny, *procs, *txt;
668 int fd, allow_fd, deny_fd;
671 m7a_dir = msg("/var/cgroup/micoforia");
672 container_dir = msg("%s/%s", m7a_dir, c->name);
673 allow = msg("%s/devices.allow", container_dir);
674 deny = msg("%s/devices.deny", container_dir);
675 procs = msg("%s/cgroup.procs", container_dir);
677 if (mkdir(m7a_dir, 0777) < 0 && errno != EEXIST)
678 die_errno("mkdir %s", m7a_dir);
680 if (mkdir(container_dir, 0777) < 0 && errno != EEXIST)
681 die_errno("mkdir %s", container_dir);
683 if ((allow_fd = open(allow, O_WRONLY)) < 0)
684 die_errno("open %s", allow);
686 if ((deny_fd = open(deny, O_WRONLY)) < 0)
687 die_errno("open %s", deny);
690 num_entries = get_dacl(c, &dacl);
691 INFO_LOG("applying %u entr%s\n", num_entries, num_entries == 1?
693 for (n = 0; n < num_entries; n++) {
694 char *entry = dacl[n];
695 DEBUG_LOG("dac entry #%u: %s %s\n", n, dacl[n][0] == 'a'?
696 "allow" : "deny", dacl[n] + 1);
697 txt = msg("%s\n", entry + 1);
699 fd = entry[0] == 'a'? allow_fd : deny_fd;
700 if (write(fd, txt, sz) != sz)
701 die_errno("could not write to cgroup devices.%s file",
702 entry[0] == 'a'? "allow" : "deny");
707 txt = msg("%u\n", (unsigned)getpid());
708 write_cgroup(procs, txt);
712 static void cgroup_init(void)
714 const char controllers[] = "+cpu +memory +io\n";
717 if (access("/var/cgroup/cgroup.clone_children", F_OK) < 0)
718 die("cgroup v1 not mounted at /var/cgroup/");
719 if (access("/var/cgroup2/cgroup.subtree_control", F_OK) < 0)
720 die("cgroup v1 not mounted at /var/cgroup/");
721 write_cgroup("/var/cgroup2/cgroup.subtree_control", controllers);
722 m7a_dir = msg("/var/cgroup2/micoforia");
723 if (mkdir(m7a_dir, 0777) < 0 && errno != EEXIST)
724 die_errno("mkdir %s", m7a_dir);
725 ctl = msg("%s/cgroup.subtree_control", m7a_dir);
727 write_cgroup(ctl, controllers);
731 static void create_cgroup_v2(const struct container *c)
734 char *ctl, *dir = msg("/var/cgroup2/micoforia/%s", c->name);
736 if (mkdir(dir, 0777) < 0 && errno != EEXIST)
737 die_errno("mkdir %s", dir);
738 ctl = msg("%s/cgroup.procs", dir);
740 sprintf(buf, "%u\n", (unsigned)getpid());
741 write_cgroup(ctl, buf);
745 static unsigned get_cpu_cores(const struct container *c)
747 return c->cpu_cores != ~0U? c->cpu_cores :
748 OPT_UINT32_VAL(MICOFORIA, DEFAULT_CPU_CORES);
751 static void apply_cpu_limit(const struct container *c)
754 unsigned cores = get_cpu_cores(c);
756 if (cores == 0) /* unlimited */
758 assert(cores != ~0U);
759 INFO_LOG("%u core%s\n", cores, cores == 1? "" : "s");
760 ctl = msg("/var/cgroup2/micoforia/%s/cpu.max", c->name);
761 str = msg("%u 1000000\n", 1000000 * cores);
762 write_cgroup(ctl, str);
767 static unsigned get_memory_limit(const struct container *c)
769 return c->memory_limit != ~0U? c->memory_limit :
770 OPT_UINT32_VAL(MICOFORIA, DEFAULT_MEMORY_LIMIT);
773 static void apply_memory_limit(const struct container *c)
776 unsigned gigs = get_memory_limit(c);
778 if (gigs == 0) /* unlimited */
781 INFO_LOG("%uG\n", gigs);
782 ctl = msg("/var/cgroup2/micoforia/%s/memory.high", c->name);
783 str = msg("%llu\n", 1024LLU * 1024LLU * 1024LLU * gigs);
784 write_cgroup(ctl, str);
789 static unsigned get_iospecs(const struct container *c, char ***result)
791 if (c->num_io_max_entries > 0) {
793 return c->num_io_max_entries;
795 if (num_default_io_max_entries > 0) {
796 *result = default_io_max;
797 return num_default_io_max_entries;
803 static void apply_io_limit(const struct container *c)
805 unsigned n, num_entries;
809 num_entries = get_iospecs(c, &iospec);
810 if (num_entries == 0)
812 INFO_LOG("%u entries\n", num_entries);
813 io_max = msg("/var/cgroup2/micoforia/%s/io.max", c->name);
814 for (n = 0; n < num_entries; n++)
815 write_cgroup(io_max, iospec[n]);
819 static void cgroup_cleanup(const struct container *c)
821 char *dir = msg("/var/cgroup/micoforia/%s", c->name);
822 remove_subdirs_recursively(dir);
824 dir = msg("/var/cgroup2/micoforia/%s", c->name);
825 remove_subdirs_recursively(dir);
829 static bool setup_network(const struct container *c)
835 WARNING_LOG("could not set establish loopback link\n");
836 for (n = 0; n < c->num_ifspecs; n++) {
837 iface = interface_name(c, n, false);
838 peer = interface_name(c, n, true);
839 link_del(iface); /* ignore errors */
840 if (!create_veth_device_pair(iface, peer))
842 if (!set_hwaddr(peer, c->ifspec[n].hwaddr))
844 if (!attach_to_bridge(iface, c->ifspec[n].bridge))
858 static void setup_termios(int fd)
860 struct winsize wsz; /* see ioctl_tty(2) */
865 if (tcgetattr(fd, &tios)) {
866 ERROR_LOG("tcgetattr: %m\n");
869 tios.c_lflag &= ~(ECHO | ISIG | ICANON);
871 tios.c_cc[VTIME] = 0;
872 if (tcsetattr(fd, TCSAFLUSH, &tios) < 0)
873 ERROR_LOG("tcsetattr: %m\n");
874 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &wsz) >= 0)
875 ioctl(fd, TIOCSWINSZ, &wsz);
878 struct device_node_info {
879 unsigned major, minor;
884 static void create_standard_device_nodes(struct container_runtime *cr)
886 const struct device_node_info devices[] = {
887 {.major = 1, .minor = 3, .mode = 0666, .name = "null"},
888 {.major = 1, .minor = 5, .mode = 0666, .name = "zero"},
889 {.major = 1, .minor = 7, .mode = 0666, .name = "full"},
890 {.major = 1, .minor = 8, .mode = 0666, .name = "random"},
891 {.major = 1, .minor = 9, .mode = 0666, .name = "urandom"},
892 {.major = 4, .minor = 0, .mode = 0620, .name = "tty0"},
893 {.major = 5, .minor = 1, .mode = 0600, .name = "console"},
894 {.major = 5, .minor = 2, .mode = 0666, .name = "ptmx"},
898 for (n = 0; n < ARRAY_SIZE(devices); n++) {
899 const struct device_node_info *d = devices + n;
900 char *path = msg("%s/%s", cr->dev, d->name);
901 if (mknod(path, S_IFCHR, makedev(d->major, d->minor)) < 0)
902 die_errno("mknod %s", d->name);
903 chmod(path, d->mode);
908 static void init_console(struct container_runtime *cr)
913 if (mount(NULL, cr->dev, "tmpfs", 0, "size=500000,mode=755") < 0)
914 die("mount tmpfs at %s: %m", cr->dev);
915 create_standard_device_nodes(cr);
916 for (n = 0; n < cr->num_ttys; n++) {
917 char *tty = msg("%s/tty%u", cr->dev, cr->tty[n]);
919 if (mknod(tty, S_IFCHR, makedev(4, cr->tty[n])) < 0)
920 die("mknod %s: %m", tty);
922 setup_termios(cr->slave[n]);
923 INFO_LOG("bind mounting %s -> %s\n", ttyname(cr->slave[n]), tty);
924 if (mount(ttyname(cr->slave[n]), tty, "none",
925 MS_BIND | MS_PRIVATE, NULL) < 0)
926 die("failed to bind mount %s: %m\n", tty);
929 console = msg("%s/console", cr->dev);
930 if (mount(ttyname(cr->slave[0]), console, "none",
931 MS_BIND | MS_PRIVATE, NULL) < 0)
932 die("failed to bind mount %s: %m\n", console);
937 * These umounts fail if the container shutdown already umounted the bind
938 * mounted devices. This is not fatal, so log only with low severity.
940 static void shutdown_console(struct container_runtime *cr)
945 for (n = 0; n < cr->num_ttys; n++) {
946 char *tty = msg("%s/tty%u", cr->dev, n);
947 if (umount2(tty, MNT_DETACH) < 0)
948 DEBUG_LOG("umount %s: %m\n", tty);
951 console = msg("%s/console", cr->dev);
952 if (umount2(console, MNT_DETACH) < 0)
953 DEBUG_LOG("umount %s: %m\n", console);
957 static char *get_socket_path(const char *container_name)
959 return msg("micoforia/%s", container_name);
962 /* Ignore everything the client sends us, but invalidate the fd on EOF. */
963 static void dispatch_client(int *client)
966 if (read(*client, buf, sizeof(buf)) <= 0) {
967 NOTICE_LOG("detaching client on fd %d\n", *client);
973 static void dispatch_socket_request(struct container_runtime *cr)
982 memset(buf, 0, sizeof(buf));
983 if (!recv_cred_buffer(cr->socket_fd, buf, sizeof(buf) - 1, &cfd, &uid))
985 if (uid != getuid()) {
986 const char msg[] = "\1EACCES";
987 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
988 NOTICE_LOG("access denied for uid %d\n", (int)uid);
991 if (strcmp(buf, "init_pid") == 0) {
993 memcpy(buf + 1, &cr->init_pid, sizeof(int));
994 send(cfd, buf, 1 + sizeof(int), MSG_DONTWAIT);
997 if (sscanf(buf, "attach %u", &minor) == 1) {
999 } else if (sscanf(buf, "force-attach %u", &minor) == 1) {
1002 const char msg[] = "\1EINVAL";
1003 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1004 NOTICE_LOG("invalid request: %s\n", buf);
1007 for (n = 0; n < cr->num_ttys; n++) {
1008 INFO_LOG("n: %u, tty[n]: %u\n", n, cr->tty[n]);
1009 if (cr->tty[n] == minor)
1012 if (n == cr->num_ttys) {
1013 const char msg[] = "\1ENOTTY";
1014 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1015 NOTICE_LOG("tty%u is not being forwarded\n", minor);
1018 if (cr->client[n] >= 0) {
1020 close(cr->client[n]);
1023 const char msg[] = "\1EBUSY";
1024 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1025 ERROR_LOG("tty%u is already in use\n", minor);
1029 if (!pass_fd(cr->master[n], cfd)) {
1030 ERROR_LOG("could not pass master fd\n");
1033 NOTICE_LOG("attached client on fd %d to tty%u\n", cfd, minor);
1034 cr->client[n] = cfd;
1040 /* discards read data if dst < 0 */
1041 static bool copy(int src, int dst)
1046 sz1 = read(src, buf, sizeof(buf));
1050 DEBUG_LOG("read from fd %d: %m\n", src);
1056 sz2 = write(dst, buf, sz1);
1058 DEBUG_LOG("write to fd %d: %m\n", dst);
1062 DEBUG_LOG("short write to fd %d\n", dst);
1069 * The function returns only when the process receives SIGCHLD. In this case
1070 * the return value is 0 for success, 1 for failure, and 2 if the child's exit
1071 * code indicates a reboot request. Other signals are pushed down to the child
1074 static int parent_loop(pid_t pid, const struct container *c,
1075 struct container_runtime *cr)
1079 init_signal_handling();
1081 int sig, max_fileno = 0;
1085 if (OPT_GIVEN(START, FOREGROUND)) {
1086 FD_SET(STDIN_FILENO, &fds);
1087 if (STDIN_FILENO > max_fileno)
1088 max_fileno = STDIN_FILENO;
1090 FD_SET(signal_pipe[0], &fds);
1091 if (signal_pipe[0] > max_fileno)
1092 max_fileno = signal_pipe[0];
1093 FD_SET(cr->socket_fd, &fds);
1094 if (cr->socket_fd > max_fileno)
1095 max_fileno = cr->socket_fd;
1096 for (n = 0; n < cr->num_ttys; n++) {
1097 if (cr->client[n] >= 0) { /* detached */
1098 FD_SET(cr->client[n], &fds);
1099 if (cr->client[n] > max_fileno)
1100 max_fileno = cr->client[n];
1102 FD_SET(cr->master[n], &fds);
1103 if (cr->master[n] > max_fileno)
1104 max_fileno = cr->master[n];
1107 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1109 ERROR_LOG("select: %m\n");
1113 if (!FD_ISSET(signal_pipe[0], &fds))
1115 sig = next_signal();
1116 if (sig == SIGCHLD) {
1118 if (waitpid(pid, &wstatus, WNOHANG) < 0) {
1119 WARNING_LOG("wait: %m\n");
1123 if (!WIFEXITED(wstatus))
1125 if (WEXITSTATUS(wstatus) == 2)
1127 return WEXITSTATUS(wstatus) != EXIT_SUCCESS;
1131 if (FD_ISSET(cr->socket_fd, &fds))
1132 dispatch_socket_request(cr);
1133 for (n = 0; n < cr->num_ttys; n++) {
1134 if (cr->client[n] >= 0) {
1135 if FD_ISSET(cr->client[n], &fds)
1136 dispatch_client(cr->client + n);
1137 } else { /* stdout is /dev/null in background mode */
1138 if (FD_ISSET(cr->master[n], &fds))
1139 copy(cr->master[n], n == 0?
1140 STDOUT_FILENO : -1);
1143 if (OPT_GIVEN(START, FOREGROUND)) {
1144 if (FD_ISSET(STDIN_FILENO, &fds))
1145 copy(STDIN_FILENO, cr->master[0]);
1150 /* Set net namespace of child and call parent_loop(). */
1151 static int run_parent(pid_t child_pid, const struct container *c,
1152 struct container_runtime *cr)
1157 close(cr->pipe1[1]);
1158 close(cr->pipe2[0]);
1159 if (read(cr->pipe1[0], &cr->init_pid, 4) != 4) {
1160 ERROR_LOG("pipe1 read error\n");
1161 close(cr->pipe1[0]);
1162 close(cr->pipe2[1]);
1165 INFO_LOG("received grand child pid: %u\n", (unsigned)cr->init_pid);
1166 close(cr->pipe1[0]);
1167 for (n = 0; n < c->num_ifspecs; n++) {
1168 char *peer = interface_name(c, n, true);
1169 success = set_netns(peer, child_pid);
1172 ERROR_LOG("set_netns error\n");
1173 close(cr->pipe2[1]);
1177 success = write(cr->pipe2[1], "\0", 1) == 1;
1178 close(cr->pipe2[1]);
1180 ERROR_LOG("pipe2 write error\n");
1183 return parent_loop(child_pid, c, cr);
1186 static unsigned get_capdrops(const struct container *c, cap_value_t **result)
1188 static cap_value_t builtin_capdrop[] = {CAP_SYS_MODULE, CAP_SYS_TIME,
1192 *result = c->capdrop;
1193 return c->num_capdrops;
1195 if (OPT_GIVEN(MICOFORIA, DEFAULT_CAPDROP)) {
1196 *result = default_capdrop;
1197 return num_default_capdrops;
1199 *result = builtin_capdrop;
1200 return ARRAY_SIZE(builtin_capdrop);
1203 static void drop_caps(const struct container *c)
1205 cap_value_t *capdrop;
1206 unsigned n, num_capdrops;
1208 INFO_LOG("lowering bounding set capabilities\n");
1209 num_capdrops = get_capdrops(c, &capdrop);
1210 for (n = 0; n < num_capdrops; n++) {
1211 char *name = cap_to_name(capdrop[n]);
1212 DEBUG_LOG("dropping %s\n", name);
1214 if (cap_drop_bound(capdrop[n]) < 0)
1215 die_errno("cap_drop_bound");
1219 __attribute ((noreturn))
1220 static void child_loop(pid_t pid, struct container_runtime *cr)
1224 INFO_LOG("parent: %u, child: %u, init: %u\n", (unsigned) getppid(),
1225 (unsigned)getpid(), (unsigned)pid);
1226 init_signal_handling();
1234 FD_SET(signal_pipe[0], &fds);
1235 if (signal_pipe[0] > max_fileno)
1236 max_fileno = signal_pipe[0];
1237 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1239 ERROR_LOG("select: %m\n");
1242 do { if (FD_ISSET(signal_pipe[0], &fds)) {
1243 int sig = next_signal();
1244 if (sig == SIGCHLD) {
1245 if (waitpid(pid, &wstatus, WNOHANG) < 0) {
1246 WARNING_LOG("wait: %m\n");
1249 shutdown_console(cr);
1250 if (WIFSIGNALED(wstatus) &&
1251 WTERMSIG(wstatus) == 1) {
1252 NOTICE_LOG("reboot requested\n");
1255 NOTICE_LOG("container terminated\n");
1258 NOTICE_LOG("sending signal %d to container init\n",
1260 kill(pid, sig == SIGINT? SIGINT : SIGKILL);
1265 static const char *get_init_path(const struct container *c)
1267 return c->init? c->init : OPT_STRING_VAL(MICOFORIA, DEFAULT_INIT);
1271 * The child process unshares namespaces, spawns the init process which runs
1272 * the pre-exec hook and executes the container init process. This function
1273 * never returns, but both the child and the init process exit when the
1274 * container terminates. The exit code of the child tells the parent whether
1275 * it should restart the container.
1277 __attribute ((noreturn))
1278 static void run_child(const struct container *c, struct container_runtime *cr)
1281 char *init, *put_old;
1285 close(cr->socket_fd);
1286 for (n = 0; n < cr->num_ttys; n++)
1287 close(cr->master[n]);
1288 close(cr->pipe1[0]);
1289 close(cr->pipe2[1]);
1290 if (unshare(CLONE_NEWNET) < 0)
1291 die_errno("unshare net ns\n");
1292 if (unshare(CLONE_NEWPID) < 0)
1293 die_errno("unshare pid ns\n");
1294 /* fork again to become pid 1 in the new pid namespace */
1295 if ((pid = fork()) < 0)
1298 * By writing to pipe1 we tell the parent (a) we've unshared the net
1299 * namespace, and (b) the pid of the init process in the parent
1303 close(cr->pipe2[0]);
1304 if (write(cr->pipe1[1], (const char *)&pid, 4) != 4)
1305 die_errno("pipe write error");
1306 close(cr->pipe1[1]);
1307 child_loop(pid, cr); /* never returns */
1310 DEBUG_LOG("now running as pid %d\n", pid);
1311 if (read(cr->pipe2[0], &ch, 1) != 1)
1312 die_errno("pipe read error");
1313 close(cr->pipe1[1]);
1314 close(cr->pipe2[0]);
1315 if (unshare(CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWUTS) < 0)
1316 die_errno("unshare");
1317 mkdir(cr->dev, 0777);
1319 for (n = 0; n < cr->num_ttys; n++)
1320 close(cr->slave[n]);
1321 INFO_LOG("setting hostname to %s\n", c->name);
1322 if (sethostname(c->name, strlen(c->name)) < 0)
1323 die_errno("sethostname error");
1324 if (chdir(cr->root) < 0)
1325 die_errno("chdir %s", cr->root);
1329 apply_memory_limit(c);
1331 for (n = 0; n < c->num_ifspecs; n++) {
1332 char *peer = interface_name(c, n, true);
1333 char *renamed = msg("eth%u", n);
1334 if (!rename_interface(peer, renamed))
1335 die("can not rename %s to %s\n", peer, renamed);
1339 run_pre_exec_hook(c);
1340 setup_termios(STDIN_FILENO);
1341 put_old = msg("%s/mnt", cr->root);
1342 /* glibc does not provide a wrapper for pivot_root */
1343 if (syscall(SYS_pivot_root, ".", put_old) < 0)
1344 die_errno("pivot_root (put_old: %s)", put_old);
1345 if (umount2("/mnt", MNT_DETACH) < 0)
1346 die_errno("umount %s", put_old);
1348 close(STDIN_FILENO);
1349 init = xstrdup(get_init_path(c));
1350 INFO_LOG("handing over control to container init: %s\n", init);
1351 execve(init, (char *[]){init, NULL}, NULL);
1352 die_errno("failed to exec init process %s", c->init);
1356 * We need three processes, called parent, child, init, because we want one
1357 * process run with namespaces unmodified, requiring one fork. After the child
1358 * has unshared its PID namespace, it keeps its old PID, so we need to fork
1359 * again to get pid 1. The child can not terminate because the parent can not
1360 * wait(2) on its grandchild.
1362 static bool exec_container(const struct container *c)
1367 struct container_runtime cr = {0};
1371 create_cgroup_v2(c);
1372 socket_path = get_socket_path(c->name);
1373 success = listen_on_unix_socket(socket_path, &cr.socket_fd);
1375 ERROR_LOG("can not listen on unix socket %s\n", socket_path);
1379 cr.root = get_root_dir(c);
1380 cr.dev = msg("%s/dev", cr.root);
1381 cr.pts = realpath("/proc/self/fd/0", NULL);
1382 DEBUG_LOG("pts: %s\n", cr.pts);
1383 cr.num_ttys = get_container_ttys(c, &cr.tty);
1384 cr.master = xmalloc(cr.num_ttys * sizeof(int));
1385 cr.slave = xmalloc(cr.num_ttys * sizeof(int));
1386 cr.client = xmalloc(cr.num_ttys * sizeof(int));
1387 for (n = 0; n < cr.num_ttys; n++)
1390 NOTICE_LOG("starting %s\n", c->name);
1391 for (n = 0; n < cr.num_ttys; n++) {
1392 if (openpty(cr.master + n, cr.slave + n, NULL, NULL, NULL) < 0)
1394 DEBUG_LOG("pty (tty%u <-> %s)\n", n, ttyname(cr.slave[n]));
1396 /* mount rw, ignore errors */
1397 mount(NULL, cr.root, NULL, MS_REMOUNT, NULL);
1398 if (!setup_network(c))
1400 if (!run_pre_start_hook(c))
1402 if (pipe(cr.pipe1) < 0) /* child -> parent */
1404 if (pipe(cr.pipe2) < 0)
1405 die_errno("pipe2"); /* parent -> child */
1406 if ((pid = fork()) < 0)
1409 run_child(c, &cr); /* never returns */
1410 ret = run_parent(pid, c, &cr);
1413 NOTICE_LOG("rebooting\n");
1414 for (n = 0; n < cr.num_ttys; n++) {
1415 close(cr.master[n]);
1421 static char *get_container_logfile(const char *name)
1423 return msg("%s/%s", OPT_STRING_VAL(MICOFORIA, LOGDIR), name);
1426 static bool start_container(const struct container *c)
1430 struct termios tios;
1433 if (is_locked(c->name, &pid)) {
1434 ERROR_LOG("%s is locked by pid %u\n", c->name, (unsigned)pid);
1437 if (OPT_GIVEN(START, FOREGROUND)) {
1438 if (!isatty(STDIN_FILENO) || !isatty(STDOUT_FILENO)) {
1439 ERROR_LOG("both stdin and stdout must be terminals\n");
1442 if (tcgetattr(STDIN_FILENO, &tios) < 0) {
1443 ERROR_LOG("tcgetattr: %m\n");
1447 if ((pid = fork()) < 0)
1451 logfile = get_container_logfile(c->name);
1455 if (!try_lock(c->name, &pid))
1456 die("%s is locked by pid %u", c->name, (unsigned)pid);
1457 success = exec_container(c);
1458 if (OPT_GIVEN(START, FOREGROUND)) {
1459 if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &tios) < 0)
1460 ERROR_LOG("tcsetattr: %m\n");
1462 exit(success? EXIT_SUCCESS : EXIT_FAILURE);
1465 static void check_container_args(void)
1467 unsigned n, num_inputs;
1468 struct container *c;
1470 num_inputs = lls_num_inputs(sublpr);
1471 if (num_inputs == 0) {
1472 if (num_containers == 0)
1473 die("no container configured\n");
1474 if (OPT_GIVEN(START, FOREGROUND) && num_containers > 1)
1475 die("must specify container for foreground mode");
1477 if (OPT_GIVEN(START, FOREGROUND) && num_inputs > 1)
1478 die("can start only one container in foreground mode");
1479 for (n = 0; n < num_inputs; n++) {
1480 const char *name = lls_input(n, sublpr);
1481 c = get_container(name);
1483 die("container not configured: %s", name);
1488 struct container_arg_iter {
1492 #define INITIALIZED_CAI(_cai) {.idx = 0}
1494 static struct container *cai_next(struct container_arg_iter *cai, bool *skipped)
1496 unsigned num_inputs = lls_num_inputs(sublpr);
1500 if (num_inputs == 0) {
1501 if (cai->idx >= num_containers)
1503 return container[cai->idx++];
1505 for (; cai->idx < num_inputs; cai->idx++) {
1506 const char *name = lls_input(cai->idx, sublpr);
1507 struct container *c = get_container(name);
1509 ERROR_LOG("%s: not configured\n", name);
1520 static bool for_each_container_arg(bool (*f)(const struct container *c))
1522 struct container *c;
1523 bool success = true;
1525 struct container_arg_iter cai = INITIALIZED_CAI(cai);
1527 while ((c = cai_next(&cai, &skipped)))
1528 if (!f(c) || skipped)
1533 static bool com_start(void)
1535 const char *logdir = OPT_STRING_VAL(MICOFORIA, LOGDIR);
1537 check_container_args();
1538 if (logdir[0] == '\0')
1539 die_empty_arg("loggir");
1541 if (mkdir(logdir, 0777) < 0 && errno != EEXIST)
1542 die_errno("mkdir %s", logdir);
1543 return for_each_container_arg(start_container);
1545 EXPORT_CMD_HANDLER(start);
1547 static bool send_signal_to_container(int signum, const struct container *c)
1552 if (!is_locked(c->name, &pid)) {
1553 INFO_LOG("%s is not running\n", c->name);
1556 DEBUG_LOG("sending signal %d to pid %u\n", signum, (unsigned)pid);
1557 success = kill(pid, signum) >= 0;
1559 ERROR_LOG("kill %s: %m\n", c->name);
1563 static void clean_env(void)
1565 char *term = getenv("TERM");
1569 setenv("TERM", term, 0);
1570 setenv("PATH", "/root/bin:/usr/local/sbin:/usr/local/bin"
1571 ":/sbin:/usr/sbin:/bin:/usr/bin", 0);
1572 setenv("USER", "root", 0);
1573 setenv("LOGNAME", "root", 0);
1574 setenv("HOME", "/root", 0);
1577 static bool request_init_pid(const char *name, int *result)
1579 char *socket_path = get_socket_path(name);
1583 success = request_int(socket_path, "init_pid", result);
1586 ERROR_LOG("could not determine init pid of %s\n", name);
1590 static bool shutdown_container(const struct container *c)
1594 char *argv[] = {"nsenter", "-w", "-a", "-r", "-t", str, "halt", NULL};
1596 if (!is_locked(c->name, NULL)) {
1597 if (lls_num_inputs(sublpr) == 0)
1599 ERROR_LOG("container not running: %s\n", c->name);
1607 if (!request_init_pid(c->name, &pid))
1608 _exit(EXIT_FAILURE);
1609 sprintf(str, "%d", pid);
1611 execvp(argv[0], argv);
1612 _exit(EXIT_FAILURE);
1615 static bool container_is_dead(const struct container *c)
1617 return !is_locked(c->name, NULL);
1620 static bool wait_for_containers_to_die(void)
1626 while (ms < 20000) {
1627 ts.tv_sec = ms / 1000;
1628 ts.tv_nsec = (ms % 1000) * 1000 * 1000;
1629 if (nanosleep(&ts, NULL) < 0)
1631 success = for_each_container_arg(container_is_dead);
1639 static bool com_stop(void)
1641 bool success = for_each_container_arg(shutdown_container);
1645 if (!OPT_GIVEN(STOP, WAIT))
1647 return wait_for_containers_to_die();
1649 EXPORT_CMD_HANDLER(stop);
1651 static bool reboot_container(const struct container *c)
1653 return send_signal_to_container(SIGINT, c);
1656 static bool com_reboot(void)
1658 return for_each_container_arg(reboot_container);
1660 EXPORT_CMD_HANDLER(reboot);
1662 static bool kill_container(const struct container *c)
1664 return send_signal_to_container(SIGUSR1, c);
1667 static bool com_kill(void)
1669 bool success = for_each_container_arg(kill_container);
1673 if (!OPT_GIVEN(KILL, WAIT))
1675 return wait_for_containers_to_die();
1677 EXPORT_CMD_HANDLER(kill);
1679 static void list_container_verbose(const struct container *c)
1684 cap_value_t *capdrop;
1686 char cores_str[25] = "unlimited";
1687 unsigned cores = get_cpu_cores(c);
1689 printf("%s:\n", c->name);
1690 printf("\tpre-start hook: %s\n", get_pre_start_hook(c));
1691 printf("\tpre-exec hook: %s\n", get_pre_exec_hook(c));
1692 root = get_root_dir(c);
1693 printf("\troot dir: %s\n", root);
1695 printf("\tinit path: %s\n", get_init_path(c));
1696 for (n = 0; n < c->num_ifspecs; n++) {
1697 char pretty_hwaddr[18];
1698 char *iface = interface_name(c, n, false);
1699 pretty_print_hwaddr(c->ifspec[n].hwaddr, pretty_hwaddr);
1700 printf("\tinterface #%u: %s (%s)\n", n, iface, pretty_hwaddr);
1703 N = get_dacl(c, &word_list);
1704 for (n = 0; n < N; n++)
1705 printf("\tdac entry #%u: %s %s\n", n, word_list[n][0] == 'a'?
1706 "allow" : "deny", word_list[n] + 1);
1707 N = get_iospecs(c, &word_list);
1708 for (n = 0; n < N; n++)
1709 printf("\tiospec #%u: %s\n", n, word_list[n]);
1711 sprintf(cores_str, "%u", cores);
1712 printf("\tCPU core limit: %s\n", cores_str);
1713 printf("\tmemory limit: %uG\n", get_memory_limit(c));
1714 N = get_capdrops(c, &capdrop);
1715 for (n = 0; n < N; n++)
1716 printf("\tcapdrop #%u: %s\n", n, cap_to_name(capdrop[n]));
1717 N = get_container_ttys(c, &tty);
1718 for (n = 0; n < N; n++)
1719 printf("\ttty #%u: %u\n", n, tty[n]);
1722 static bool com_ls(void)
1724 struct container *c;
1725 bool skipped, success = true;
1726 struct container_arg_iter cai = INITIALIZED_CAI(cai);
1728 while ((c = cai_next(&cai, &skipped))) {
1732 if (!is_locked(c->name, &pid)) {
1733 if (!OPT_GIVEN(LS, ALL)) {
1739 if (OPT_GIVEN(LS, VERBOSE)) {
1740 list_container_verbose(c);
1743 if (OPT_GIVEN(LS, LONG)) {
1745 printf("%u\t", (unsigned)pid);
1748 printf("%u\t", get_cpu_cores(c));
1749 printf("%uG\t", get_memory_limit(c));
1750 printf("%s\n", c->name);
1753 if (!OPT_GIVEN(LS, QUIET))
1754 printf("%s\n", c->name);
1756 if (skipped) /* needed if the last given container arg is invalid */
1760 EXPORT_CMD_HANDLER(ls);
1762 static bool list_container_processes(const struct container *c)
1766 char *argv[] = {"pstree", "-anp", str, NULL};
1769 success = is_locked(c->name, &pid);
1771 if (lls_num_inputs(sublpr) == 0)
1773 ERROR_LOG("container \"%s\" is not running\n", c->name);
1776 if (!OPT_GIVEN(PS, ALL) && !request_init_pid(c->name, &pid))
1778 sprintf(str, "%d", pid);
1779 success = xexec(argv, NULL);
1783 static bool com_ps(void)
1785 return for_each_container_arg(list_container_processes);
1787 EXPORT_CMD_HANDLER(ps);
1789 static bool com_attach(void)
1795 int master, ret, socket_fd;
1796 bool have_escape = false;
1797 struct termios tios;
1798 uint32_t minor = OPT_UINT32_VAL(ATTACH, TTY);
1801 if (!isatty(STDIN_FILENO) || !isatty(STDOUT_FILENO)) {
1802 ERROR_LOG("both stdin and stdout must be terminals\n");
1805 if (tcgetattr(STDIN_FILENO, &tios) < 0)
1806 die_errno("tcgetattr");
1807 ret = lls_check_arg_count(sublpr, 1, 1, &errctx);
1809 die_lopsub(ret, &errctx);
1810 arg = lls_input(0, sublpr);
1811 if (!is_locked(arg, &pid)) {
1812 ERROR_LOG("container not running: %s\n", arg);
1815 socket_path = get_socket_path(arg);
1816 if (OPT_GIVEN(ATTACH, FORCE))
1817 rq = msg("force-attach %u", minor);
1819 rq = msg("attach %u", minor);
1820 socket_fd = request_fd(socket_path, rq, &master);
1823 INFO_LOG("Attached to /dev/tty%u of container %s\n", minor, arg);
1824 NOTICE_LOG("Type CTRL+a q to quit\n");
1825 setup_termios(STDIN_FILENO);
1826 setup_termios(master);
1831 FD_SET(STDIN_FILENO, &fds);
1832 if (STDIN_FILENO > max_fileno)
1833 max_fileno = STDIN_FILENO;
1834 FD_SET(master, &fds);
1835 if (master > max_fileno)
1836 max_fileno = master;
1837 FD_SET(socket_fd, &fds);
1838 if (socket_fd > max_fileno)
1839 max_fileno = socket_fd;
1840 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1842 ERROR_LOG("select: %m\n");
1845 if (FD_ISSET(socket_fd, &fds))
1847 if (FD_ISSET(STDIN_FILENO, &fds)) {
1849 if (read(STDIN_FILENO, &c, 1) <= 0)
1851 if (c == 1 && !have_escape)
1853 else if (c == 'q' && have_escape)
1855 else if (write(master, &c, 1) != 1)
1858 if (FD_ISSET(master, &fds)) {
1859 if (!copy(master, STDOUT_FILENO))
1863 if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &tios) < 0)
1864 ERROR_LOG("tcsetattr: %m\n");
1868 EXPORT_CMD_HANDLER(attach);
1870 static bool com_help(void)
1873 char *errctx, *help;
1875 const struct lls_command *cmd;
1877 ret = lls_check_arg_count(sublpr, 0, 1, &errctx);
1879 die_lopsub(ret, &errctx);
1880 if (lls_num_inputs(sublpr) == 0) {
1881 show_subcommand_summary(OPT_GIVEN(HELP, LONG));
1884 arg = lls_input(0, sublpr);
1885 ret = lls_lookup_subcmd(arg, micoforia_suite, &errctx);
1887 die_lopsub(ret, &errctx);
1888 cmd = lls_cmd(ret, micoforia_suite);
1889 if (OPT_GIVEN(HELP, LONG))
1890 help = lls_long_help(cmd);
1892 help = lls_short_help(cmd);
1893 printf("%s\n", help);
1897 EXPORT_CMD_HANDLER(help);
1899 static bool com_configtest(void)
1901 printf("Syntax Ok\n");
1904 EXPORT_CMD_HANDLER(configtest);
1906 static bool com_edit(void)
1908 char *ed = getenv("EDITOR"); /* must not be freed */
1909 char *conf = get_config_file_path();
1910 char *argv[] = {ed? ed : "vi", conf, NULL};
1911 bool success = xexec(argv, NULL);
1916 EXPORT_CMD_HANDLER(edit);
1918 static bool com_enter(void)
1922 char *nsenter_args[] = {"nsenter", "-w", "-a", "-r", "-t"};
1923 const unsigned nna = ARRAY_SIZE(nsenter_args); /* num nsenter args */
1924 char *dflt_cmd[] = {"login", "-f", "root"};
1925 unsigned n, N, ni = lls_num_inputs(sublpr);
1926 unsigned nea = ni > 1? ni - 1 : ARRAY_SIZE(dflt_cmd); /* num extra args */
1932 ret = lls_check_arg_count(sublpr, 1, INT_MAX, &errctx);
1934 die_lopsub(ret, &errctx);
1935 arg = lls_input(0, sublpr);
1936 if (!is_locked(arg, &pid)) {
1937 ERROR_LOG("container not running: %s\n", arg);
1940 if (!request_init_pid(arg, &pid))
1942 N = nna + nea + 2; /* +1 for arg to -t and +1 for terminating NULL */
1943 argv = xmalloc(N * sizeof(char *));
1944 for (n = 0; n < nna; n++)
1945 argv[n] = nsenter_args[n];
1946 sprintf(str, "%d", pid);
1948 for (n = 0; n < nea; n++)
1949 argv[nna + 1 + n] = ni > 1? (char *)lls_input(n + 1, sublpr)
1953 success = xexec(argv, NULL);
1957 EXPORT_CMD_HANDLER(enter);
1959 static bool com_log(void)
1962 char *errctx, *logfile;
1963 bool success, use_less = isatty(STDIN_FILENO) && isatty(STDOUT_FILENO);
1964 char *argv[] = {use_less? "less" : "cat", NULL /* filename */, NULL};
1966 ret = lls_check_arg_count(sublpr, 1, 1, &errctx);
1968 die_lopsub(ret, &errctx);
1969 logfile = get_container_logfile(lls_input(0, sublpr));
1971 success = xexec(argv, NULL);
1975 EXPORT_CMD_HANDLER(log);
1977 int main(int argc, char *argv[])
1981 const struct micoforia_user_data *ud;
1982 unsigned num_inputs;
1985 parse_options(argc, argv, CMD_PTR(MICOFORIA), &lpr);
1986 loglevel_arg_val = OPT_UINT32_VAL(MICOFORIA, LOGLEVEL);
1988 num_inputs = lls_num_inputs(lpr);
1989 ret = lls_lookup_subcmd(argv[argc - num_inputs], micoforia_suite, &errctx);
1991 die_lopsub(ret, &errctx);
1992 subcmd = lls_cmd(ret, micoforia_suite);
1993 parse_options(num_inputs, argv + argc - num_inputs, subcmd, &sublpr);
1994 ud = lls_user_data(subcmd);
1995 exit(ud->handler()? EXIT_SUCCESS : EXIT_FAILURE);