Initial commit.
[micoforia.git] / micoforia.c
1 /* SPDX-License-Identifier: GPL-2.0-only */
2
3 #include "m7a.h"
4
5 #include <lopsub.h>
6 #include <sys/mman.h>
7 #include <sched.h>
8 #include <sys/ioctl.h>
9 #include <sys/mount.h>
10 #include <sys/sysmacros.h>
11 #include <pty.h>
12 #include <utmp.h>
13 #include <sys/socket.h>
14 #include <sys/capability.h>
15 #include <sys/syscall.h>
16
17 #include "micoforia.lsg.h"
18
19 static struct lls_parse_result *lpr, *sublpr;
20 unsigned loglevel_arg_val = 4;
21
22 struct ifspec {
23 char *bridge;
24 uint8_t hwaddr[6];
25 };
26
27 struct container {
28 char *name;
29 char *pre_start_hook;
30 char *pre_exec_hook;
31 char *root_dir;
32 char *init;
33 struct ifspec *ifspec;
34 /* this is never zero, even if no ifspec was given */
35 unsigned num_ifspecs;
36 char **dacl;
37 unsigned num_dac_entries;
38 char **io_max;
39 unsigned num_io_max_entries;
40 /* ~0U: not given, 0: unlimited */
41 unsigned cpu_cores;
42 unsigned memory_limit;
43 /* ~0U: not given */
44 unsigned init_type;
45 cap_value_t *capdrop;
46 unsigned num_capdrops;
47 uint32_t *tty;
48 unsigned num_ttys;
49 };
50
51 static struct container **container;
52 static unsigned num_containers;
53
54 struct container_runtime {
55 int pipe1[2], pipe2[2]; /* for startup communication */
56 uint32_t *tty;
57 unsigned num_ttys;
58 int *master, *slave, *client;
59
60 int init_pid; /* in the parent namespace */
61 char *pts, *root, *dev;
62 int socket_fd;
63 };
64
65 static char **default_dacl, **default_io_max;
66 unsigned num_default_dac_entries, num_default_io_max_entries;
67 static cap_value_t *default_capdrop;
68 unsigned num_default_capdrops;
69 uint32_t *default_tty;
70 unsigned num_default_ttys;
71 static const struct lls_command *subcmd;
72 /* does not allocate memory */
73 void m7a_log(int ll, const char* fmt,...)
74 {
75 va_list argp;
76
77 if (ll < loglevel_arg_val)
78 return;
79 va_start(argp, fmt);
80 if (subcmd == lls_cmd(LSG_MICOFORIA_CMD_START, micoforia_suite)) {
81 char str[100];
82 struct timespec t;
83 struct tm *tm;
84 assert(clock_gettime(CLOCK_REALTIME, &t) == 0);
85 tm = localtime(&t.tv_sec);
86 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
87 fprintf(stderr, "%s:%04lu ", str,
88 (long unsigned)t.tv_nsec / 1000 / 1000);
89 fprintf(stderr, "(%u) ", (unsigned)getpid());
90 }
91 vfprintf(stderr, fmt, argp);
92 va_end(argp);
93 }
94
95 static void die_lopsub(int lopsub_ret, char **errctx)
96 {
97 const char *m = lls_strerror(-lopsub_ret);
98 if (*errctx)
99 ERROR_LOG("%s: %s\n", *errctx, m);
100 else
101 ERROR_LOG("%s\n", m);
102 free(*errctx);
103 *errctx = NULL;
104 die("lopsub error");
105 }
106
107 #define FOR_EACH_CONTAINER(_c) for ( \
108 struct container **_cp = container; \
109 ((_c) = *(_cp)); \
110 (_cp)++, (_c) = *(_cp) \
111 )
112
113 static struct container *get_container(const char *name)
114 {
115 struct container *c;
116 FOR_EACH_CONTAINER(c) {
117 if (!strcmp(c->name, name))
118 return c;
119 }
120 return NULL;
121 }
122
123 static struct container *get_or_append_container(const char *name)
124 {
125 struct container *c = get_container(name);
126 if (c)
127 return c;
128 container = xrealloc(container,
129 (++num_containers + 1) * sizeof(struct container *));
130 c = container[num_containers - 1] = xzmalloc(sizeof(struct container));
131 c->name = xstrdup(name);
132 /* ~0U means: not given */
133 c->cpu_cores = ~0U;
134 c->memory_limit = ~0U;
135 c->init_type = ~0U;
136 container[num_containers] = NULL;
137 return c;
138 }
139
140 static unsigned get_container_ttys(const struct container *c, uint32_t **result)
141 {
142 static uint32_t dflt = {1};
143 if (c->num_ttys > 0) {
144 *result = c->tty;
145 return c->num_ttys;
146 }
147 if (num_default_ttys > 0) {
148 *result = default_tty;
149 return num_default_ttys;
150 }
151 *result = &dflt;
152 return 1;
153 }
154
155 enum clo_given_counter {
156 CLOGC_DEFAULT_CGROUP_DAC,
157 CLOGC_CGROUP_DAC,
158 CLOGC_DEFAULT_IO_MAX,
159 CLOGC_IO_MAX,
160 NUM_CLOGCS
161 };
162
163 static unsigned clo_given_counter[NUM_CLOGCS];
164
165 static void append_dac_entry(const char *arg, char ***listp, unsigned *count)
166 {
167 char *val = parse_cgroup_acl(arg);
168 (*count)++;
169 *listp = xrealloc(*listp, (*count + 1) * sizeof(char *));
170 (*listp)[*count - 1] = val;
171 (*listp)[*count] = NULL;
172 }
173
174 static void append_io_max_entry(const char *arg, char ***listp, unsigned *count)
175 {
176 (*count)++;
177 *listp = xrealloc(*listp, (*count + 1) * sizeof(char *));
178 (*listp)[*count - 1] = xstrdup(arg);
179 (*listp)[*count] = NULL;
180 }
181
182 static void check_options(void)
183 {
184 unsigned n, m;
185 const char *arg;
186 char *name, *val;
187 struct container *c;
188 uint32_t u32;
189
190 container = xzmalloc(sizeof(struct container *));
191 /* loop backwards to let command line opts override config file opts */
192 for (n = OPT_GIVEN(MICOFORIA, CONTAINER) - 1; n != ~0U; n--) {
193 arg = OPT_STRING_VAL_N(n, MICOFORIA, CONTAINER);
194 check_name(arg);
195 get_or_append_container(arg);
196 }
197 for (n = OPT_GIVEN(MICOFORIA, PRE_START_HOOK) - 1; n != ~0U; n--) {
198 arg = OPT_STRING_VAL_N(n, MICOFORIA, PRE_START_HOOK);
199 parse_compound_arg(arg, "pre-start-hook", &name, &val);
200 c = get_or_append_container(name);
201 free(name);
202 free(c->pre_start_hook);
203 c->pre_start_hook = val;
204 }
205 for (n = OPT_GIVEN(MICOFORIA, PRE_EXEC_HOOK) - 1; n != ~0U; n--) {
206 arg = OPT_STRING_VAL_N(n, MICOFORIA, PRE_EXEC_HOOK);
207 parse_compound_arg(arg, "pre-exec-hook", &name, &val);
208 c = get_or_append_container(name);
209 free(name);
210 free(c->pre_exec_hook);
211 c->pre_exec_hook = val;
212 }
213 for (n = OPT_GIVEN(MICOFORIA, CAPDROP) - 1; n != ~0U; n--) {
214 cap_value_t cap_val;
215 arg = OPT_STRING_VAL_N(n, MICOFORIA, CAPDROP);
216 parse_compound_arg(arg, "capabilities", &name, &val);
217 c = get_or_append_container(name);
218 if (cap_from_name(val, &cap_val) < 0)
219 die_errno("%s: invalid capability: %s", name, val);
220 c->capdrop = xrealloc(c->capdrop,
221 ++c->num_capdrops * sizeof(cap_value_t));
222 c->capdrop[c->num_capdrops - 1] = cap_val;
223 free(name);
224 free(val);
225 }
226 for (n = 0; n < OPT_GIVEN(MICOFORIA, DEFAULT_CAPDROP); n++) {
227 cap_value_t cap_val;
228 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CAPDROP);
229 if (cap_from_name(arg, &cap_val) < 0)
230 die_errno("invalid default capability: %s", val);
231 default_capdrop = xrealloc(default_capdrop,
232 ++num_default_capdrops * sizeof(cap_value_t));
233 default_capdrop[num_default_capdrops - 1] = cap_val;
234 }
235 for (n = OPT_GIVEN(MICOFORIA, TTY) - 1; n != ~0U; n--) {
236 uint32_t minor;
237 arg = OPT_STRING_VAL_N(n, MICOFORIA, TTY);
238 parse_compound_arg(arg, "tty", &name, &val);
239 c = get_or_append_container(name);
240 minor = atou32(val, "tty");
241 if (minor == 0)
242 die("can not capture tty0");
243 c->tty = xrealloc(c->tty, ++c->num_ttys * sizeof(uint32_t));
244 c->tty[c->num_ttys - 1] = minor;
245 free(name);
246 free(val);
247 }
248 for (n = 0; n < OPT_GIVEN(MICOFORIA, DEFAULT_TTY); n++) {
249 uint32_t minor = OPT_UINT32_VAL_N(n, MICOFORIA, DEFAULT_TTY);
250 if (minor == 0)
251 die("can not capture tty0");
252 default_tty = xrealloc(default_tty,
253 ++num_default_ttys * sizeof(uint32_t));
254 default_tty[num_default_ttys - 1] = minor;
255 }
256
257 for (n = OPT_GIVEN(MICOFORIA, ROOT_DIRECTORY) - 1; n != ~0U ; n--) {
258 arg = OPT_STRING_VAL_N(n, MICOFORIA, ROOT_DIRECTORY);
259 parse_compound_arg(arg, "root-directory", &name, &val);
260 c = get_or_append_container(name);
261 free(name);
262 free(c->root_dir);
263 c->root_dir = val;
264 }
265 u32 = OPT_UINT32_VAL(MICOFORIA, DEFAULT_CPU_CORES);
266 check_range(u32, 0, 65536, "default-cpu-cores");
267 for (n = OPT_GIVEN(MICOFORIA, CPU_CORES) - 1; n != ~0U ; n--) {
268 arg = OPT_STRING_VAL_N(n, MICOFORIA, CPU_CORES);
269 parse_compound_arg(arg, "cpu-cores", &name, &val);
270 c = get_or_append_container(name);
271 free(name);
272 u32 = atou32(val, "cpu-cores");
273 free(val);
274 check_range(u32, 0, 65536, "cpu-cores");
275 c->cpu_cores = u32;
276 }
277 u32 = OPT_UINT32_VAL(MICOFORIA, DEFAULT_MEMORY_LIMIT);
278 check_range(u32, 0, 1024 * 1024, "default-memory-limit");
279 for (n = OPT_GIVEN(MICOFORIA, MEMORY_LIMIT) - 1; n != ~0U ; n--) {
280 arg = OPT_STRING_VAL_N(n, MICOFORIA, MEMORY_LIMIT);
281 parse_compound_arg(arg, "memory-limit", &name, &val);
282 c = get_or_append_container(name);
283 free(name);
284 u32 = atou32(val, "memory-limit");
285 free(val);
286 check_range(u32, 0, 1024 * 1024, "memory-limit");
287 c->memory_limit = u32;
288 }
289 for (n = OPT_GIVEN(MICOFORIA, INIT) - 1; n != ~0U ; n--) {
290 arg = OPT_STRING_VAL_N(n, MICOFORIA, INIT);
291 parse_compound_arg(arg, "init", &name, &val);
292 c = get_or_append_container(name);
293 free(name);
294 free(c->init);
295 c->init = val;
296 }
297 for (n = 0; n < OPT_GIVEN(MICOFORIA, NET); n++) {
298 struct ifspec *ifspec;
299 arg = OPT_STRING_VAL_N(n, MICOFORIA, NET);
300 parse_compound_arg(arg, "net", &name, &val);
301 c = get_or_append_container(name);
302 free(name);
303 c->ifspec = xrealloc(c->ifspec,
304 ++c->num_ifspecs * sizeof(struct ifspec));
305 ifspec = c->ifspec + c->num_ifspecs - 1;
306 parse_ifspec(val, &ifspec->bridge, ifspec->hwaddr);
307 free(val);
308 }
309
310 m = clo_given_counter[CLOGC_DEFAULT_CGROUP_DAC];
311 for (n = m; n < OPT_GIVEN(MICOFORIA, DEFAULT_CGROUP_DAC); n++) {
312 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CGROUP_DAC);
313 append_dac_entry(arg, &default_dacl, &num_default_dac_entries);
314 }
315 for (n = 0; n < m; n++) {
316 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_CGROUP_DAC);
317 append_dac_entry(arg, &default_dacl, &num_default_dac_entries);
318 }
319 m = clo_given_counter[CLOGC_CGROUP_DAC];
320 for (n = m; n < OPT_GIVEN(MICOFORIA, CGROUP_DAC); n++) {
321 arg = OPT_STRING_VAL_N(n, MICOFORIA, CGROUP_DAC);
322 parse_compound_arg(arg, "cgroup-dac", &name, &val);
323 c = get_or_append_container(name);
324 free(name);
325 append_dac_entry(val, &c->dacl, &c->num_dac_entries);
326 free(val);
327 }
328 for (n = 0; n < m; n++) {
329 arg = OPT_STRING_VAL_N(n, MICOFORIA, CGROUP_DAC);
330 parse_compound_arg(arg, "cgroup-dac", &name, &val);
331 c = get_or_append_container(name);
332 free(name);
333 append_dac_entry(val, &c->dacl, &c->num_dac_entries);
334 free(val);
335 }
336
337 m = clo_given_counter[CLOGC_DEFAULT_IO_MAX];
338 for (n = m; n < OPT_GIVEN(MICOFORIA, DEFAULT_IO_MAX); n++) {
339 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_IO_MAX);
340 append_io_max_entry(arg, &default_io_max, &num_default_io_max_entries);
341 }
342 for (n = 0; n < m; n++) {
343 arg = OPT_STRING_VAL_N(n, MICOFORIA, DEFAULT_IO_MAX);
344 append_io_max_entry(arg, &default_io_max, &num_default_io_max_entries);
345 }
346 m = clo_given_counter[CLOGC_IO_MAX];
347 for (n = m; n < OPT_GIVEN(MICOFORIA, IO_MAX); n++) {
348 arg = OPT_STRING_VAL_N(n, MICOFORIA, IO_MAX);
349 parse_compound_arg(arg, "io-max", &name, &val);
350 c = get_or_append_container(name);
351 free(name);
352 append_io_max_entry(val, &c->io_max, &c->num_io_max_entries);
353 free(val);
354 }
355 for (n = 0; n < m; n++) {
356 arg = OPT_STRING_VAL_N(n, MICOFORIA, IO_MAX);
357 parse_compound_arg(arg, "io-max", &name, &val);
358 c = get_or_append_container(name);
359 free(name);
360 append_io_max_entry(val, &c->io_max, &c->num_io_max_entries);
361 free(val);
362 }
363
364 /* init default c->ifspec[] */
365 FOR_EACH_CONTAINER(c) {
366 if (c->num_ifspecs == 0) {
367 const char *br = OPT_STRING_VAL(MICOFORIA, DEFAULT_BRIDGE);
368 c->num_ifspecs = 1;
369 c->ifspec = xmalloc(sizeof(struct ifspec));
370 c->ifspec[0].bridge = xstrdup(br);
371 memset(c->ifspec[0].hwaddr, 0, 6);
372 continue;
373 }
374 }
375 }
376
377 static void show_subcommand_summary(bool verbose)
378 {
379 int i;
380
381 #define LSG_MICOFORIA_CMD(_name) #_name
382 static const char * const subcommand_names[] = {LSG_MICOFORIA_SUBCOMMANDS NULL};
383 #undef LSG_MICOFORIA_CMD
384 printf("Available subcommands:\n");
385 if (verbose) {
386 const struct lls_command *cmd;
387 for (i = 1; (cmd = lls_cmd(i, micoforia_suite)); i++) {
388 const char *purpose = lls_purpose(cmd);
389 const char *name = lls_command_name(cmd);
390 printf("%-12s%s\n", name, purpose);
391 }
392 } else {
393 unsigned n = 8;
394 printf("\t");
395 for (i = 0; i < LSG_NUM_MICOFORIA_SUBCOMMANDS; i++) {
396 if (i > 0)
397 n += printf(", ");
398 if (n > 70) {
399 printf("\n\t");
400 n = 8;
401 }
402 n += printf("%s", subcommand_names[i]);
403 }
404 printf("\n");
405 }
406 }
407
408 const char *GET_VERSION(void);
409 static void handle_version_and_help(void)
410 {
411 char *help;
412
413 if (OPT_GIVEN(MICOFORIA, VERSION)) {
414 printf(PACKAGE " %s\n"
415 "Copyright (C) " COPYRIGHT_YEAR " " AUTHOR ".\n"
416 "License: " LICENSE " <" LICENSE_URL ">.\n"
417 "This is free software: you are free to change and redistribute it.\n"
418 "There is NO WARRANTY, to the extent permitted by law.\n"
419 "\n"
420 "Web page: " URL "\n"
421 "Clone URL: " CLONE_URL "\n"
422 "Gitweb: " GITWEB_URL "\n"
423 "Author's Home Page: " HOME_URL "\n"
424 "Send feedback to: " AUTHOR " <" EMAIL ">\n"
425 ,
426 GET_VERSION()
427 );
428 exit(EXIT_SUCCESS);
429 }
430 if (OPT_GIVEN(MICOFORIA, DETAILED_HELP))
431 help = lls_long_help(CMD_PTR(MICOFORIA));
432 else if (OPT_GIVEN(MICOFORIA, HELP))
433 help = lls_short_help(CMD_PTR(MICOFORIA));
434 else if (lls_num_inputs(lpr) == 0) {
435 show_subcommand_summary(true /* verbose */);
436 exit(EXIT_SUCCESS);
437 } else
438 return;
439 printf("%s\n", help);
440 free(help);
441 exit(EXIT_SUCCESS);
442 }
443
444 static char *get_config_file_path(void)
445 {
446 struct passwd *pw;
447 const char *home;
448
449 if (OPT_GIVEN(MICOFORIA, CONFIG_FILE))
450 return xstrdup(OPT_STRING_VAL(MICOFORIA, CONFIG_FILE));
451 pw = getpwuid(getuid());
452 home = pw? pw->pw_dir : "/root";
453 return msg("%s/.micoforiarc", home);
454 }
455
456 static void parse_options(int argc, char **argv, const struct lls_command *cmd,
457 struct lls_parse_result **lprp)
458 {
459 int ret, fd = -1;
460 char *config_file;
461 struct stat statbuf;
462 void *map;
463 size_t sz;
464 int cf_argc;
465 char **cf_argv, *errctx = NULL;
466 const char *subcmd_name;
467 struct lls_parse_result *merged_lpr, *cf_lpr;
468
469 ret = lls_parse(argc, argv, cmd, lprp, &errctx);
470 if (ret < 0)
471 die_lopsub(ret, &errctx);
472 handle_version_and_help();
473 clo_given_counter[CLOGC_DEFAULT_CGROUP_DAC] = OPT_GIVEN(MICOFORIA,
474 DEFAULT_CGROUP_DAC);
475 clo_given_counter[CLOGC_CGROUP_DAC] = OPT_GIVEN(MICOFORIA, CGROUP_DAC);
476 clo_given_counter[CLOGC_DEFAULT_IO_MAX] =
477 OPT_GIVEN(MICOFORIA, DEFAULT_IO_MAX);
478 clo_given_counter[CLOGC_IO_MAX] = OPT_GIVEN(MICOFORIA, IO_MAX);
479 config_file = get_config_file_path();
480 ret = open(config_file, O_RDONLY);
481 if (ret < 0) {
482 if (errno != ENOENT || OPT_GIVEN(MICOFORIA, CONFIG_FILE))
483 die_errno("can not open config file %s", config_file);
484 /* no config file -- nothing to do */
485 ret = 0;
486 goto success;
487 }
488 fd = ret;
489 ret = fstat(fd, &statbuf);
490 if (ret < 0)
491 die_errno("failed to stat config file %s", config_file);
492 sz = statbuf.st_size;
493 if (sz == 0) { /* config file is empty -- nothing to do */
494 ret = 0;
495 goto success;
496 }
497 map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
498 if (map == MAP_FAILED)
499 die_errno("failed to mmap config file %s", config_file);
500 subcmd_name = (cmd == CMD_PTR(MICOFORIA))? NULL : lls_command_name(cmd);
501 ret = lls_convert_config(map, sz, subcmd_name, &cf_argv,
502 &errctx);
503 munmap(map, sz);
504 if (ret < 0) {
505 ERROR_LOG("failed to convert config file %s\n", config_file);
506 die_lopsub(ret, &errctx);
507 }
508 cf_argc = ret;
509 ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
510 lls_free_argv(cf_argv);
511 if (ret < 0)
512 die_lopsub(ret, &errctx);
513 /* command line options override config file options */
514 ret = lls_merge(*lprp, cf_lpr, cmd, &merged_lpr, &errctx);
515 if (ret < 0)
516 die_lopsub(ret, &errctx);
517 lls_free_parse_result(cf_lpr, cmd);
518 lls_free_parse_result(*lprp, cmd);
519 *lprp = merged_lpr;
520 success:
521 if (fd >= 0)
522 close(fd);
523 free(config_file);
524 }
525
526 static const char *get_pre_start_hook(const struct container *c)
527 {
528 if (c->pre_start_hook)
529 return c->pre_start_hook;
530 return OPT_STRING_VAL(MICOFORIA, DEFAULT_PRE_START_HOOK);
531 }
532
533 static const char *get_pre_exec_hook(const struct container *c)
534 {
535 if (c->pre_exec_hook)
536 return c->pre_exec_hook;
537 return OPT_STRING_VAL(MICOFORIA, DEFAULT_PRE_EXEC_HOOK);
538 }
539
540 static char *get_root_dir(const struct container *c)
541 {
542 if (c->root_dir)
543 return xstrdup(c->root_dir);
544 return msg("%s/%s", OPT_STRING_VAL(MICOFORIA, DEFAULT_ROOT_PREFIX), c->name);
545 }
546
547 static char *get_ifspec_string(const struct container *c)
548 {
549 unsigned n;
550 char *str = NULL;
551
552 assert(c->num_ifspecs > 0);
553 for (n = 0; n < c->num_ifspecs; n++) {
554 uint8_t *x = c->ifspec[n].hwaddr;
555 char *tmp = msg("%s%s%s:%02x:%02x:%02x:%02x:%02x:%02x",
556 str? str : "",
557 str? " " : "",
558 c->ifspec[n].bridge,
559 x[0], x[1], x[2], x[3], x[4], x[5]
560 );
561 free(str);
562 str = tmp;
563 }
564 return str;
565 }
566
567 static char *interface_name(const struct container *c, unsigned idx, bool peer)
568 {
569 assert(idx < c->num_ifspecs);
570 if (c->num_ifspecs == 1)
571 return peer? msg("%s-g", c->name) : xstrdup(c->name);
572 if (peer)
573 return msg("%s-%s-g", c->name, c->ifspec[idx].bridge);
574 return msg("%s-%s", c->name, c->ifspec[idx].bridge);
575 }
576
577 static void set_m7a_root_dir_env(const struct container *c)
578 {
579 char *root = get_root_dir(c);
580 DEBUG_LOG("root dir: %s\n", root);
581 setenv("MICOFORIA_ROOT_DIR", root, 1);
582 free(root);
583 }
584
585 static bool run_pre_start_hook(const struct container *c)
586 {
587 char *ifspec;
588 char *cmd = xstrdup(get_pre_start_hook(c));
589 char *argv[] = {"/bin/sh", "-c", cmd, NULL};
590 bool success;
591
592 setenv("MICOFORIA_CONTAINER_NAME", c->name, 1);
593 set_m7a_root_dir_env(c);
594
595 ifspec = get_ifspec_string(c);
596 DEBUG_LOG("ifspecs: %s\n", ifspec);
597 setenv("MICOFORIA_IFSPECS", ifspec, 1);
598 free(ifspec);
599
600 INFO_LOG("running pre-start hook %s\n", cmd);
601 success = xexec(argv, NULL);
602 free(cmd);
603 if (!success)
604 ERROR_LOG("pre-start hook failed\n");
605 unsetenv("MICOFORIA_CONTAINER_NAME");
606 unsetenv("MICOFORIA_IFSPECS");
607 unsetenv("MICOFORIA_ROOT_DIR");
608 return success;
609 }
610
611 static void run_pre_exec_hook(const struct container *c)
612 {
613 char *cmd = xstrdup(get_pre_exec_hook(c));
614 char *argv[] = {"/bin/sh", "-c", cmd, NULL};
615
616 INFO_LOG("/bin/sh -c '%s'\n", cmd);
617 set_m7a_root_dir_env(c);
618 if (!xexec(argv, NULL))
619 die("%s: pre-exec hook failed", c->name);
620 free(cmd);
621 unsetenv("MICOFORIA_ROOT_DIR");
622 }
623
624 static void write_cgroup(const char *path, const char *txt)
625 {
626 int fd;
627 size_t sz;
628
629 if ((fd = open(path, O_WRONLY)) < 0)
630 die_errno("open %s", path);
631 sz = strlen(txt);
632 if (write(fd, txt, sz) != sz)
633 die_errno("could not write to %s", path);
634 close(fd);
635 }
636
637 static unsigned get_dacl(const struct container *c, char ***result)
638 {
639 static char *dflt[] = {
640 "da", /* deny access to all devices except the ones below */
641 "ac 1:3 rwm", /* null */
642 "ac 1:5 rwm", /* zero */
643 "ac 1:7 rwm", /* full */
644 "ac 1:8 rwm", /* random */
645 "ac 1:9 rwm", /* urandom */
646 "ac 4:* rwm", /* tty?* */
647 "ac 5:0 rwm", /* tty */
648 "ac 5:2 rwm", /* ptmx */
649 "ac 136:* rwm", /* pts */
650 };
651 if (c->num_dac_entries > 0) {
652 *result = c->dacl;
653 return c->num_dac_entries;
654 }
655 if (num_default_dac_entries > 0) {
656 *result = default_dacl;
657 return num_default_dac_entries;
658 }
659 *result = dflt;
660 return ARRAY_SIZE(dflt);
661 }
662
663 static void apply_dacl(const struct container *c)
664 {
665 char **dacl;
666 unsigned n, num_entries;
667 char *m7a_dir, *container_dir, *allow, *deny, *procs, *txt;
668 int fd, allow_fd, deny_fd;
669 size_t sz;
670
671 m7a_dir = msg("/var/cgroup/micoforia");
672 container_dir = msg("%s/%s", m7a_dir, c->name);
673 allow = msg("%s/devices.allow", container_dir);
674 deny = msg("%s/devices.deny", container_dir);
675 procs = msg("%s/cgroup.procs", container_dir);
676
677 if (mkdir(m7a_dir, 0777) < 0 && errno != EEXIST)
678 die_errno("mkdir %s", m7a_dir);
679 free(m7a_dir);
680 if (mkdir(container_dir, 0777) < 0 && errno != EEXIST)
681 die_errno("mkdir %s", container_dir);
682 free(container_dir);
683 if ((allow_fd = open(allow, O_WRONLY)) < 0)
684 die_errno("open %s", allow);
685 free(allow);
686 if ((deny_fd = open(deny, O_WRONLY)) < 0)
687 die_errno("open %s", deny);
688 free(deny);
689
690 num_entries = get_dacl(c, &dacl);
691 INFO_LOG("applying %u entr%s\n", num_entries, num_entries == 1?
692 "y" : "ies");
693 for (n = 0; n < num_entries; n++) {
694 char *entry = dacl[n];
695 DEBUG_LOG("dac entry #%u: %s %s\n", n, dacl[n][0] == 'a'?
696 "allow" : "deny", dacl[n] + 1);
697 txt = msg("%s\n", entry + 1);
698 sz = strlen(txt);
699 fd = entry[0] == 'a'? allow_fd : deny_fd;
700 if (write(fd, txt, sz) != sz)
701 die_errno("could not write to cgroup devices.%s file",
702 entry[0] == 'a'? "allow" : "deny");
703 free(txt);
704 }
705 close(allow_fd);
706 close(deny_fd);
707 txt = msg("%u\n", (unsigned)getpid());
708 write_cgroup(procs, txt);
709 free(txt);
710 }
711
712 static void cgroup_init(void)
713 {
714 const char controllers[] = "+cpu +memory +io\n";
715 char *m7a_dir, *ctl;
716
717 if (access("/var/cgroup/cgroup.clone_children", F_OK) < 0)
718 die("cgroup v1 not mounted at /var/cgroup/");
719 if (access("/var/cgroup2/cgroup.subtree_control", F_OK) < 0)
720 die("cgroup v1 not mounted at /var/cgroup/");
721 write_cgroup("/var/cgroup2/cgroup.subtree_control", controllers);
722 m7a_dir = msg("/var/cgroup2/micoforia");
723 if (mkdir(m7a_dir, 0777) < 0 && errno != EEXIST)
724 die_errno("mkdir %s", m7a_dir);
725 ctl = msg("%s/cgroup.subtree_control", m7a_dir);
726 free(m7a_dir);
727 write_cgroup(ctl, controllers);
728 free(ctl);
729 }
730
731 static void create_cgroup_v2(const struct container *c)
732 {
733 char buf[10];
734 char *ctl, *dir = msg("/var/cgroup2/micoforia/%s", c->name);
735
736 if (mkdir(dir, 0777) < 0 && errno != EEXIST)
737 die_errno("mkdir %s", dir);
738 ctl = msg("%s/cgroup.procs", dir);
739 free(dir);
740 sprintf(buf, "%u\n", (unsigned)getpid());
741 write_cgroup(ctl, buf);
742 free(ctl);
743 }
744
745 static unsigned get_cpu_cores(const struct container *c)
746 {
747 return c->cpu_cores != ~0U? c->cpu_cores :
748 OPT_UINT32_VAL(MICOFORIA, DEFAULT_CPU_CORES);
749 }
750
751 static void apply_cpu_limit(const struct container *c)
752 {
753 char *str, *ctl;
754 unsigned cores = get_cpu_cores(c);
755
756 if (cores == 0) /* unlimited */
757 return;
758 assert(cores != ~0U);
759 INFO_LOG("%u core%s\n", cores, cores == 1? "" : "s");
760 ctl = msg("/var/cgroup2/micoforia/%s/cpu.max", c->name);
761 str = msg("%u 1000000\n", 1000000 * cores);
762 write_cgroup(ctl, str);
763 free(ctl);
764 free(str);
765 }
766
767 static unsigned get_memory_limit(const struct container *c)
768 {
769 return c->memory_limit != ~0U? c->memory_limit :
770 OPT_UINT32_VAL(MICOFORIA, DEFAULT_MEMORY_LIMIT);
771 }
772
773 static void apply_memory_limit(const struct container *c)
774 {
775 char *str, *ctl;
776 unsigned gigs = get_memory_limit(c);
777
778 if (gigs == 0) /* unlimited */
779 return;
780 assert(gigs != ~0U);
781 INFO_LOG("%uG\n", gigs);
782 ctl = msg("/var/cgroup2/micoforia/%s/memory.high", c->name);
783 str = msg("%llu\n", 1024LLU * 1024LLU * 1024LLU * gigs);
784 write_cgroup(ctl, str);
785 free(ctl);
786 free(str);
787 }
788
789 static unsigned get_iospecs(const struct container *c, char ***result)
790 {
791 if (c->num_io_max_entries > 0) {
792 *result = c->dacl;
793 return c->num_io_max_entries;
794 }
795 if (num_default_io_max_entries > 0) {
796 *result = default_io_max;
797 return num_default_io_max_entries;
798 }
799 *result = NULL;
800 return 0;
801 }
802
803 static void apply_io_limit(const struct container *c)
804 {
805 unsigned n, num_entries;
806 char *io_max;
807 char **iospec;
808
809 num_entries = get_iospecs(c, &iospec);
810 if (num_entries == 0)
811 return;
812 INFO_LOG("%u entries\n", num_entries);
813 io_max = msg("/var/cgroup2/micoforia/%s/io.max", c->name);
814 for (n = 0; n < num_entries; n++)
815 write_cgroup(io_max, iospec[n]);
816 free(io_max);
817 }
818
819 static void cgroup_cleanup(const struct container *c)
820 {
821 char *dir = msg("/var/cgroup/micoforia/%s", c->name);
822 remove_subdirs_recursively(dir);
823 free(dir);
824 dir = msg("/var/cgroup2/micoforia/%s", c->name);
825 remove_subdirs_recursively(dir);
826 free(dir);
827 }
828
829 static bool setup_network(const struct container *c)
830 {
831 unsigned n;
832 char *iface, *peer;
833
834 if (!link_up("lo"))
835 WARNING_LOG("could not set establish loopback link\n");
836 for (n = 0; n < c->num_ifspecs; n++) {
837 iface = interface_name(c, n, false);
838 peer = interface_name(c, n, true);
839 link_del(iface); /* ignore errors */
840 if (!create_veth_device_pair(iface, peer))
841 goto fail;
842 if (!set_hwaddr(peer, c->ifspec[n].hwaddr))
843 goto fail;
844 if (!attach_to_bridge(iface, c->ifspec[n].bridge))
845 goto fail;
846 if (!link_up(iface))
847 goto fail;
848 free(iface);
849 free(peer);
850 }
851 return true;
852 fail:
853 free(iface);
854 free(peer);
855 return false;
856 }
857
858 static void setup_termios(int fd)
859 {
860 struct winsize wsz; /* see ioctl_tty(2) */
861 struct termios tios;
862
863 if (!isatty(fd))
864 return;
865 if (tcgetattr(fd, &tios)) {
866 ERROR_LOG("tcgetattr: %m\n");
867 return;
868 }
869 tios.c_lflag &= ~(ECHO | ISIG | ICANON);
870 tios.c_cc[VMIN] = 1;
871 tios.c_cc[VTIME] = 0;
872 if (tcsetattr(fd, TCSAFLUSH, &tios) < 0)
873 ERROR_LOG("tcsetattr: %m\n");
874 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &wsz) >= 0)
875 ioctl(fd, TIOCSWINSZ, &wsz);
876 }
877
878 struct device_node_info {
879 unsigned major, minor;
880 mode_t mode;
881 const char *name;
882 };
883
884 static void create_standard_device_nodes(struct container_runtime *cr)
885 {
886 const struct device_node_info devices[] = {
887 {.major = 1, .minor = 3, .mode = 0666, .name = "null"},
888 {.major = 1, .minor = 5, .mode = 0666, .name = "zero"},
889 {.major = 1, .minor = 7, .mode = 0666, .name = "full"},
890 {.major = 1, .minor = 8, .mode = 0666, .name = "random"},
891 {.major = 1, .minor = 9, .mode = 0666, .name = "urandom"},
892 {.major = 4, .minor = 0, .mode = 0620, .name = "tty0"},
893 {.major = 5, .minor = 1, .mode = 0600, .name = "console"},
894 {.major = 5, .minor = 2, .mode = 0666, .name = "ptmx"},
895 };
896 unsigned n;
897
898 for (n = 0; n < ARRAY_SIZE(devices); n++) {
899 const struct device_node_info *d = devices + n;
900 char *path = msg("%s/%s", cr->dev, d->name);
901 if (mknod(path, S_IFCHR, makedev(d->major, d->minor)) < 0)
902 die_errno("mknod %s", d->name);
903 chmod(path, d->mode);
904 free(path);
905 }
906 }
907
908 static void init_console(struct container_runtime *cr)
909 {
910 char *console;
911 unsigned n;
912
913 if (mount(NULL, cr->dev, "tmpfs", 0, "size=500000,mode=755") < 0)
914 die("mount tmpfs at %s: %m", cr->dev);
915 create_standard_device_nodes(cr);
916 for (n = 0; n < cr->num_ttys; n++) {
917 char *tty = msg("%s/tty%u", cr->dev, cr->tty[n]);
918 unlink(tty);
919 if (mknod(tty, S_IFCHR, makedev(4, cr->tty[n])) < 0)
920 die("mknod %s: %m", tty);
921 chmod(tty, 0660);
922 setup_termios(cr->slave[n]);
923 INFO_LOG("bind mounting %s -> %s\n", ttyname(cr->slave[n]), tty);
924 if (mount(ttyname(cr->slave[n]), tty, "none",
925 MS_BIND | MS_PRIVATE, NULL) < 0)
926 die("failed to bind mount %s: %m\n", tty);
927 free(tty);
928 }
929 console = msg("%s/console", cr->dev);
930 if (mount(ttyname(cr->slave[0]), console, "none",
931 MS_BIND | MS_PRIVATE, NULL) < 0)
932 die("failed to bind mount %s: %m\n", console);
933 free(console);
934 }
935
936 /*
937 * These umounts fail if the container shutdown already umounted the bind
938 * mounted devices. This is not fatal, so log only with low severity.
939 */
940 static void shutdown_console(struct container_runtime *cr)
941 {
942 unsigned n;
943 char *console;
944
945 for (n = 0; n < cr->num_ttys; n++) {
946 char *tty = msg("%s/tty1", cr->dev);
947 if (umount2(tty, MNT_DETACH) < 0)
948 DEBUG_LOG("umount %s: %m\n", tty);
949 free(tty);
950 }
951 console = msg("%s/console", cr->dev);
952 if (umount2(console, MNT_DETACH) < 0)
953 DEBUG_LOG("umount %s: %m\n", console);
954 free(console);
955 }
956
957 static char *get_socket_path(const char *container_name)
958 {
959 return msg("micoforia/%s", container_name);
960 }
961
962 /* Ignore everything the client sends us, but invalidate the fd on EOF. */
963 static void dispatch_client(int *client)
964 {
965 char buf[1024];
966 if (read(*client, buf, sizeof(buf)) <= 0) {
967 NOTICE_LOG("detaching client on fd %d\n", *client);
968 close(*client);
969 *client = -1;
970 }
971 }
972
973 static void dispatch_socket_request(struct container_runtime *cr)
974 {
975 uid_t uid;
976 char buf[32];
977 int cfd;
978 uint32_t minor;
979 unsigned n;
980 bool force;
981
982 memset(buf, 0, sizeof(buf));
983 if (!recv_cred_buffer(cr->socket_fd, buf, sizeof(buf) - 1, &cfd, &uid))
984 return;
985 if (uid != getuid()) {
986 const char msg[] = "\1EACCES";
987 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
988 NOTICE_LOG("access denied for uid %d\n", (int)uid);
989 goto out;
990 }
991 if (strcmp(buf, "init_pid") == 0) {
992 buf[0] = '\0';
993 memcpy(buf + 1, &cr->init_pid, sizeof(int));
994 send(cfd, buf, 1 + sizeof(int), MSG_DONTWAIT);
995 goto out;
996 }
997 if (sscanf(buf, "attach %u", &minor) == 1) {
998 force = false;
999 } else if (sscanf(buf, "force-attach %u", &minor) == 1) {
1000 force = true;
1001 } else {
1002 const char msg[] = "\1EINVAL";
1003 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1004 NOTICE_LOG("invalid request: %s\n", buf);
1005 goto out;
1006 }
1007 for (n = 0; n < cr->num_ttys; n++) {
1008 INFO_LOG("n: %u, tty[n]: %u\n", n, cr->tty[n]);
1009 if (cr->tty[n] == minor)
1010 break;
1011 }
1012 if (n == cr->num_ttys) {
1013 const char msg[] = "\1ENOTTY";
1014 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1015 NOTICE_LOG("tty%u is not being forwarded\n", minor);
1016 goto out;
1017 }
1018 if (cr->client[n] >= 0) {
1019 if (force) {
1020 close(cr->client[n]);
1021 cr->client[n] = -1;
1022 } else {
1023 const char msg[] = "\1EBUSY";
1024 send(cfd, msg, sizeof(msg), MSG_DONTWAIT);
1025 ERROR_LOG("tty%u is already in use\n", minor);
1026 goto out;
1027 }
1028 }
1029 if (!pass_fd(cr->master[n], cfd)) {
1030 ERROR_LOG("could not pass master fd\n");
1031 goto out;
1032 }
1033 NOTICE_LOG("attached client on fd %d to tty%u\n", cfd, minor);
1034 cr->client[n] = cfd;
1035 return;
1036 out:
1037 close(cfd);
1038 }
1039
1040 /* discards read data if dst < 0 */
1041 static bool copy(int src, int dst)
1042 {
1043 ssize_t sz1, sz2;
1044 char buf[1024];
1045 again:
1046 sz1 = read(src, buf, sizeof(buf));
1047 if (sz1 < 0) {
1048 if (errno == EINTR)
1049 goto again;
1050 DEBUG_LOG("read from fd %d: %m\n", src);
1051 }
1052 if (sz1 <= 0)
1053 return false;
1054 if (dst < 0)
1055 return true;
1056 sz2 = write(dst, buf, sz1);
1057 if (sz2 < 0) {
1058 DEBUG_LOG("write to fd %d: %m\n", dst);
1059 return false;
1060 }
1061 if (sz1 != sz2) {
1062 DEBUG_LOG("short write to fd %d\n", dst);
1063 return false;
1064 }
1065 return true;
1066 }
1067
1068 /*
1069 * The function returns only when the process receives SIGCHLD. In this case
1070 * the return value is 0 for success, 1 for failure, and 2 if the child's exit
1071 * code indicates a reboot request. Other signals are pushed down to the child
1072 * process.
1073 */
1074 static int parent_loop(pid_t pid, const struct container *c,
1075 struct container_runtime *cr)
1076 {
1077 unsigned n;
1078
1079 init_signal_handling();
1080 for (;;) {
1081 int sig, max_fileno = 0;
1082 fd_set fds;
1083
1084 FD_ZERO(&fds);
1085 if (OPT_GIVEN(START, FOREGROUND)) {
1086 FD_SET(STDIN_FILENO, &fds);
1087 if (STDIN_FILENO > max_fileno)
1088 max_fileno = STDIN_FILENO;
1089 }
1090 FD_SET(signal_pipe[0], &fds);
1091 if (signal_pipe[0] > max_fileno)
1092 max_fileno = signal_pipe[0];
1093 FD_SET(cr->socket_fd, &fds);
1094 if (cr->socket_fd > max_fileno)
1095 max_fileno = cr->socket_fd;
1096 for (n = 0; n < cr->num_ttys; n++) {
1097 if (cr->client[n] >= 0) { /* detached */
1098 FD_SET(cr->client[n], &fds);
1099 if (cr->client[n] > max_fileno)
1100 max_fileno = cr->client[n];
1101 } else {
1102 FD_SET(cr->master[n], &fds);
1103 if (cr->master[n] > max_fileno)
1104 max_fileno = cr->master[n];
1105 }
1106 }
1107 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1108 if (errno != EINTR)
1109 ERROR_LOG("select: %m\n");
1110 continue;
1111 }
1112 do {
1113 if (!FD_ISSET(signal_pipe[0], &fds))
1114 break;
1115 sig = next_signal();
1116 if (sig == SIGCHLD) {
1117 int wstatus;
1118 if (waitpid(pid, &wstatus, WNOHANG) < 0) {
1119 WARNING_LOG("wait: %m\n");
1120 break;
1121 }
1122 cgroup_cleanup(c);
1123 if (!WIFEXITED(wstatus))
1124 return 1;
1125 if (WEXITSTATUS(wstatus) == 2)
1126 return 2;
1127 return WEXITSTATUS(wstatus) != EXIT_SUCCESS;
1128 }
1129 kill(pid, sig);
1130 } while (0);
1131 if (FD_ISSET(cr->socket_fd, &fds))
1132 dispatch_socket_request(cr);
1133 for (n = 0; n < cr->num_ttys; n++) {
1134 if (cr->client[n] >= 0) {
1135 if FD_ISSET(cr->client[n], &fds)
1136 dispatch_client(cr->client + n);
1137 } else { /* stdout is /dev/null in background mode */
1138 if (FD_ISSET(cr->master[n], &fds))
1139 copy(cr->master[n], n == 0?
1140 STDOUT_FILENO : -1);
1141 }
1142 }
1143 if (OPT_GIVEN(START, FOREGROUND)) {
1144 if (FD_ISSET(STDIN_FILENO, &fds))
1145 copy(STDIN_FILENO, cr->master[0]);
1146 }
1147 }
1148 }
1149
1150 /* Set net namespace of child and call parent_loop(). */
1151 static int run_parent(pid_t child_pid, const struct container *c,
1152 struct container_runtime *cr)
1153 {
1154 unsigned n;
1155 bool success;
1156
1157 close(cr->pipe1[1]);
1158 close(cr->pipe2[0]);
1159 if (read(cr->pipe1[0], &cr->init_pid, 4) != 4) {
1160 ERROR_LOG("pipe1 read error\n");
1161 close(cr->pipe1[0]);
1162 close(cr->pipe2[1]);
1163 return false;
1164 }
1165 INFO_LOG("received grand child pid: %u\n", (unsigned)cr->init_pid);
1166 close(cr->pipe1[0]);
1167 for (n = 0; n < c->num_ifspecs; n++) {
1168 char *peer = interface_name(c, n, true);
1169 success = set_netns(peer, child_pid);
1170 free(peer);
1171 if (!success) {
1172 ERROR_LOG("set_netns error\n");
1173 close(cr->pipe2[1]);
1174 return false;
1175 }
1176 }
1177 success = write(cr->pipe2[1], "\0", 1) == 1;
1178 close(cr->pipe2[1]);
1179 if (!success) {
1180 ERROR_LOG("pipe2 write error\n");
1181 return false;
1182 }
1183 return parent_loop(child_pid, c, cr);
1184 }
1185
1186 static unsigned get_capdrops(const struct container *c, cap_value_t **result)
1187 {
1188 static cap_value_t builtin_capdrop[] = {CAP_SYS_MODULE, CAP_SYS_TIME,
1189 CAP_SYS_RESOURCE};
1190
1191 if (c->capdrop) {
1192 *result = c->capdrop;
1193 return c->num_capdrops;
1194 }
1195 if (OPT_GIVEN(MICOFORIA, DEFAULT_CAPDROP)) {
1196 *result = default_capdrop;
1197 return num_default_capdrops;
1198 }
1199 *result = builtin_capdrop;
1200 return ARRAY_SIZE(builtin_capdrop);
1201 }
1202
1203 static void drop_caps(const struct container *c)
1204 {
1205 cap_value_t *capdrop;
1206 unsigned n, num_capdrops;
1207
1208 INFO_LOG("lowering bounding set capabilities\n");
1209 num_capdrops = get_capdrops(c, &capdrop);
1210 for (n = 0; n < num_capdrops; n++) {
1211 char *name = cap_to_name(capdrop[n]);
1212 DEBUG_LOG("dropping %s\n", name);
1213 cap_free(name);
1214 if (cap_drop_bound(capdrop[n]) < 0)
1215 die_errno("cap_drop_bound");
1216 }
1217 }
1218
1219 __attribute ((noreturn))
1220 static void child_loop(pid_t pid, struct container_runtime *cr)
1221 {
1222 int wstatus;
1223
1224 INFO_LOG("parent: %u, child: %u, init: %u\n", (unsigned) getppid(),
1225 (unsigned)getpid(), (unsigned)pid);
1226 init_signal_handling();
1227 setsid();
1228
1229 for (;;) {
1230 int max_fileno = 0;
1231 fd_set fds;
1232
1233 FD_ZERO(&fds);
1234 FD_SET(signal_pipe[0], &fds);
1235 if (signal_pipe[0] > max_fileno)
1236 max_fileno = signal_pipe[0];
1237 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1238 if (errno != EINTR)
1239 ERROR_LOG("select: %m\n");
1240 continue;
1241 }
1242 do { if (FD_ISSET(signal_pipe[0], &fds)) {
1243 int sig = next_signal();
1244 if (sig == SIGCHLD) {
1245 if (waitpid(pid, &wstatus, WNOHANG) < 0) {
1246 WARNING_LOG("wait: %m\n");
1247 break;
1248 }
1249 shutdown_console(cr);
1250 if (WIFSIGNALED(wstatus) &&
1251 WTERMSIG(wstatus) == 1) {
1252 NOTICE_LOG("reboot requested\n");
1253 exit(2);
1254 }
1255 NOTICE_LOG("container terminated\n");
1256 exit(EXIT_SUCCESS);
1257 }
1258 NOTICE_LOG("sending signal %d to container init\n",
1259 sig);
1260 kill(pid, sig == SIGINT? SIGINT : SIGKILL);
1261 }} while(0);
1262 }
1263 }
1264
1265 static const char *get_init_path(const struct container *c)
1266 {
1267 return c->init? c->init : OPT_STRING_VAL(MICOFORIA, DEFAULT_INIT);
1268 }
1269
1270 /*
1271 * The child process unshares namespaces, spawns the init process which runs
1272 * the pre-exec hook and executes the container init process. This function
1273 * never returns, but both the child and the init process exit when the
1274 * container terminates. The exit code of the child tells the parent whether
1275 * it should restart the container.
1276 */
1277 __attribute ((noreturn))
1278 static void run_child(const struct container *c, struct container_runtime *cr)
1279 {
1280 unsigned n;
1281 char *init, *put_old;
1282 char ch;
1283 pid_t pid;
1284
1285 close(cr->socket_fd);
1286 for (n = 0; n < cr->num_ttys; n++)
1287 close(cr->master[n]);
1288 close(cr->pipe1[0]);
1289 close(cr->pipe2[1]);
1290 if (unshare(CLONE_NEWNET) < 0)
1291 die_errno("unshare net ns\n");
1292 if (unshare(CLONE_NEWPID) < 0)
1293 die_errno("unshare pid ns\n");
1294 /* fork again to become pid 1 in the new pid namespace */
1295 if ((pid = fork()) < 0)
1296 die_errno("fork");
1297 /*
1298 * By writing to pipe1 we tell the parent (a) we've unshared the net
1299 * namespace, and (b) the pid of the init process in the parent
1300 * namespace.
1301 */
1302 if (pid > 0) {
1303 close(cr->pipe2[0]);
1304 if (write(cr->pipe1[1], (const char *)&pid, 4) != 4)
1305 die_errno("pipe write error");
1306 close(cr->pipe1[1]);
1307 child_loop(pid, cr); /* never returns */
1308 }
1309 pid = getpid();
1310 DEBUG_LOG("now running as pid %d\n", pid);
1311 if (read(cr->pipe2[0], &ch, 1) != 1)
1312 die_errno("pipe read error");
1313 close(cr->pipe1[1]);
1314 close(cr->pipe2[0]);
1315 if (unshare(CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWUTS) < 0)
1316 die_errno("unshare");
1317 mkdir(cr->dev, 0777);
1318 init_console(cr);
1319 for (n = 0; n < cr->num_ttys; n++)
1320 close(cr->slave[n]);
1321 INFO_LOG("setting hostname to %s\n", c->name);
1322 if (sethostname(c->name, strlen(c->name)) < 0)
1323 die_errno("sethostname error");
1324 if (chdir(cr->root) < 0)
1325 die_errno("chdir %s", cr->root);
1326 drop_caps(c);
1327 apply_dacl(c);
1328 apply_cpu_limit(c);
1329 apply_memory_limit(c);
1330 apply_io_limit(c);
1331 for (n = 0; n < c->num_ifspecs; n++) {
1332 char *peer = interface_name(c, n, true);
1333 char *renamed = msg("eth%u", n);
1334 if (!rename_interface(peer, renamed))
1335 die("can not rename %s to %s\n", peer, renamed);
1336 free(peer);
1337 free(renamed);
1338 }
1339 run_pre_exec_hook(c);
1340 setup_termios(STDIN_FILENO);
1341 put_old = msg("%s/mnt", cr->root);
1342 /* glibc does not provide a wrapper for pivot_root */
1343 if (syscall(SYS_pivot_root, ".", put_old) < 0)
1344 die_errno("pivot_root (put_old: %s)", put_old);
1345 if (umount2("/mnt", MNT_DETACH) < 0)
1346 die_errno("umount %s", put_old);
1347 free(put_old);
1348 close(STDIN_FILENO);
1349 init = xstrdup(get_init_path(c));
1350 INFO_LOG("handing over control to container init: %s\n", init);
1351 execve(init, (char *[]){init, NULL}, NULL);
1352 die_errno("failed to exec init process %s", c->init);
1353 }
1354
1355 /*
1356 * We need three processes, called parent, child, init, because we want one
1357 * process run with namespaces unmodified, requiring one fork. After the child
1358 * has unshared its PID namespace, it keeps its old PID, so we need to fork
1359 * again to get pid 1. The child can not terminate because the parent can not
1360 * wait(2) on its grandchild.
1361 */
1362 static bool exec_container(const struct container *c)
1363 {
1364 bool success;
1365 pid_t pid;
1366 unsigned n;
1367 struct container_runtime cr = {0};
1368 char *socket_path;
1369 int ret;
1370
1371 create_cgroup_v2(c);
1372 socket_path = get_socket_path(c->name);
1373 success = listen_on_unix_socket(socket_path, &cr.socket_fd);
1374 if (!success)
1375 ERROR_LOG("can not listen on unix socket %s\n", socket_path);
1376 free(socket_path);
1377 if (!success)
1378 return 1;
1379 cr.root = get_root_dir(c);
1380 cr.dev = msg("%s/dev", cr.root);
1381 cr.pts = realpath("/proc/self/fd/0", NULL);
1382 DEBUG_LOG("pts: %s\n", cr.pts);
1383 cr.num_ttys = get_container_ttys(c, &cr.tty);
1384 cr.master = xmalloc(cr.num_ttys * sizeof(int));
1385 cr.slave = xmalloc(cr.num_ttys * sizeof(int));
1386 cr.client = xmalloc(cr.num_ttys * sizeof(int));
1387 for (n = 0; n < cr.num_ttys; n++)
1388 cr.client[n] = -1;
1389 reboot:
1390 NOTICE_LOG("starting %s\n", c->name);
1391 for (n = 0; n < cr.num_ttys; n++) {
1392 if (openpty(cr.master + n, cr.slave + n, NULL, NULL, NULL) < 0)
1393 die("openpty: %m");
1394 DEBUG_LOG("pty (tty%u <-> %s)\n", n, ttyname(cr.slave[n]));
1395 }
1396 /* mount rw, ignore errors */
1397 mount(NULL, cr.root, NULL, MS_REMOUNT, NULL);
1398 if (!setup_network(c))
1399 return false;
1400 if (!run_pre_start_hook(c))
1401 return false;
1402 if (pipe(cr.pipe1) < 0) /* child -> parent */
1403 die_errno("pipe1");
1404 if (pipe(cr.pipe2) < 0)
1405 die_errno("pipe2"); /* parent -> child */
1406 if ((pid = fork()) < 0)
1407 die_errno("fork");
1408 if (pid == 0)
1409 run_child(c, &cr); /* never returns */
1410 ret = run_parent(pid, c, &cr);
1411 if (ret != 2)
1412 return ret == 0;
1413 NOTICE_LOG("rebooting\n");
1414 for (n = 0; n < cr.num_ttys; n++) {
1415 close(cr.master[n]);
1416 close(cr.slave[n]);
1417 }
1418 goto reboot;
1419 }
1420
1421 static char *get_container_logfile(const char *name)
1422 {
1423 return msg("%s/%s", OPT_STRING_VAL(MICOFORIA, LOGDIR), name);
1424 }
1425
1426 static bool start_container(const struct container *c)
1427 {
1428 pid_t pid;
1429 char *logfile;
1430 struct termios tios;
1431 bool success;
1432
1433 if (is_locked(c->name, &pid)) {
1434 ERROR_LOG("%s is locked by pid %u\n", c->name, (unsigned)pid);
1435 return false;
1436 }
1437 if (OPT_GIVEN(START, FOREGROUND)) {
1438 if (!isatty(STDIN_FILENO) || !isatty(STDOUT_FILENO)) {
1439 ERROR_LOG("both stdin and stdout must be terminals\n");
1440 return false;
1441 }
1442 if (tcgetattr(STDIN_FILENO, &tios) < 0) {
1443 ERROR_LOG("tcgetattr: %m\n");
1444 return false;
1445 }
1446 } else {
1447 if ((pid = fork()) < 0)
1448 die_errno("fork");
1449 if (pid > 0)
1450 return true;
1451 logfile = get_container_logfile(c->name);
1452 daemonize(logfile);
1453 free(logfile);
1454 }
1455 if (!try_lock(c->name, &pid))
1456 die("%s is locked by pid %u", c->name, (unsigned)pid);
1457 success = exec_container(c);
1458 if (OPT_GIVEN(START, FOREGROUND)) {
1459 if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &tios) < 0)
1460 ERROR_LOG("tcsetattr: %m\n");
1461 }
1462 exit(success? EXIT_SUCCESS : EXIT_FAILURE);
1463 }
1464
1465 static void check_container_args(void)
1466 {
1467 unsigned n, num_inputs;
1468 struct container *c;
1469
1470 num_inputs = lls_num_inputs(sublpr);
1471 if (num_inputs == 0) {
1472 if (num_containers == 0)
1473 die("no container configured\n");
1474 if (OPT_GIVEN(START, FOREGROUND) && num_containers > 1)
1475 die("must specify container for foreground mode");
1476 } else {
1477 if (OPT_GIVEN(START, FOREGROUND) && num_inputs > 1)
1478 die("can start only one container in foreground mode");
1479 for (n = 0; n < num_inputs; n++) {
1480 const char *name = lls_input(n, sublpr);
1481 c = get_container(name);
1482 if (!c)
1483 die("container not configured: %s", name);
1484 }
1485 }
1486 }
1487
1488 struct container_arg_iter {
1489 unsigned idx;
1490 };
1491
1492 #define INITIALIZED_CAI(_cai) {.idx = 0}
1493
1494 static struct container *cai_next(struct container_arg_iter *cai, bool *skipped)
1495 {
1496 unsigned num_inputs = lls_num_inputs(sublpr);
1497
1498 if (skipped)
1499 *skipped = false;
1500 if (num_inputs == 0) {
1501 if (cai->idx >= num_containers)
1502 return NULL;
1503 return container[cai->idx++];
1504 }
1505 for (; cai->idx < num_inputs; cai->idx++) {
1506 const char *name = lls_input(cai->idx, sublpr);
1507 struct container *c = get_container(name);
1508 if (!c) {
1509 ERROR_LOG("%s: not configured\n", name);
1510 if (skipped)
1511 *skipped = true;
1512 continue;
1513 }
1514 cai->idx++;
1515 return c;
1516 }
1517 return NULL;
1518 }
1519
1520 static bool for_each_container_arg(bool (*f)(const struct container *c))
1521 {
1522 struct container *c;
1523 bool success = true;
1524 bool skipped;
1525 struct container_arg_iter cai = INITIALIZED_CAI(cai);
1526
1527 while ((c = cai_next(&cai, &skipped)))
1528 if (!f(c) || skipped)
1529 success = false;
1530 return success;
1531 }
1532
1533 static bool com_start(void)
1534 {
1535 const char *logdir = OPT_STRING_VAL(MICOFORIA, LOGDIR);
1536
1537 check_container_args();
1538 if (logdir[0] == '\0')
1539 die_empty_arg("loggir");
1540 cgroup_init();
1541 if (mkdir(logdir, 0777) < 0 && errno != EEXIST)
1542 die_errno("mkdir %s", logdir);
1543 return for_each_container_arg(start_container);
1544 }
1545 EXPORT_CMD_HANDLER(start);
1546
1547 static bool send_signal_to_container(int signum, const struct container *c)
1548 {
1549 pid_t pid;
1550 bool success;
1551
1552 if (!is_locked(c->name, &pid)) {
1553 INFO_LOG("%s is not running\n", c->name);
1554 return false;
1555 }
1556 DEBUG_LOG("sending signal %d to pid %u\n", signum, (unsigned)pid);
1557 success = kill(pid, signum) >= 0;
1558 if (!success)
1559 ERROR_LOG("kill %s: %m\n", c->name);
1560 return success;
1561 }
1562
1563 static void clean_env(void)
1564 {
1565 char *term = getenv("TERM");
1566
1567 clearenv();
1568 if (term)
1569 setenv("TERM", term, 0);
1570 setenv("PATH", "/root/bin:/usr/local/sbin:/usr/local/bin"
1571 ":/sbin:/usr/sbin:/bin:/usr/bin", 0);
1572 setenv("USER", "root", 0);
1573 setenv("LOGNAME", "root", 0);
1574 setenv("HOME", "/root", 0);
1575 }
1576
1577 static bool request_init_pid(const char *name, int *result)
1578 {
1579 char *socket_path = get_socket_path(name);
1580 bool success;
1581
1582 *result = -1;
1583 success = request_int(socket_path, "init_pid", result);
1584 free(socket_path);
1585 if (!success)
1586 ERROR_LOG("could not determine init pid of %s\n", name);
1587 return success;
1588 }
1589
1590 static bool shutdown_container(const struct container *c)
1591 {
1592 pid_t pid;
1593 char str[20];
1594 char *argv[] = {"nsenter", "-w", "-a", "-r", "-t", str, "halt", NULL};
1595
1596 if (!is_locked(c->name, NULL)) {
1597 if (lls_num_inputs(sublpr) == 0)
1598 return true;
1599 ERROR_LOG("container not running: %s\n", c->name);
1600 return false;
1601 }
1602 pid = fork();
1603 if (pid < 0)
1604 return false;
1605 if (pid > 0)
1606 return true;
1607 if (!request_init_pid(c->name, &pid))
1608 _exit(EXIT_FAILURE);
1609 sprintf(str, "%d", pid);
1610 clean_env();
1611 execvp(argv[0], argv);
1612 _exit(EXIT_FAILURE);
1613 }
1614
1615 static bool container_is_dead(const struct container *c)
1616 {
1617 return !is_locked(c->name, NULL);
1618 }
1619
1620 static bool wait_for_containers_to_die(void)
1621 {
1622 bool success;
1623 unsigned ms = 32;
1624 struct timespec ts;
1625
1626 while (ms < 20000) {
1627 ts.tv_sec = ms / 1000;
1628 ts.tv_nsec = (ms % 1000) * 1000 * 1000;
1629 if (nanosleep(&ts, NULL) < 0)
1630 return false;
1631 success = for_each_container_arg(container_is_dead);
1632 if (success)
1633 return true;
1634 ms *= 2;
1635 }
1636 return false;
1637 }
1638
1639 static bool com_stop(void)
1640 {
1641 bool success = for_each_container_arg(shutdown_container);
1642
1643 if (!success)
1644 return false;
1645 if (!OPT_GIVEN(STOP, WAIT))
1646 return true;
1647 return wait_for_containers_to_die();
1648 }
1649 EXPORT_CMD_HANDLER(stop);
1650
1651 static bool reboot_container(const struct container *c)
1652 {
1653 return send_signal_to_container(SIGINT, c);
1654 }
1655
1656 static bool com_reboot(void)
1657 {
1658 return for_each_container_arg(reboot_container);
1659 }
1660 EXPORT_CMD_HANDLER(reboot);
1661
1662 static bool kill_container(const struct container *c)
1663 {
1664 return send_signal_to_container(SIGUSR1, c);
1665 }
1666
1667 static bool com_kill(void)
1668 {
1669 bool success = for_each_container_arg(kill_container);
1670
1671 if (!success)
1672 return false;
1673 if (!OPT_GIVEN(KILL, WAIT))
1674 return true;
1675 return wait_for_containers_to_die();
1676 }
1677 EXPORT_CMD_HANDLER(kill);
1678
1679 static void list_container_verbose(const struct container *c)
1680 {
1681 char *root;
1682 unsigned n, N;
1683 char **word_list;
1684 cap_value_t *capdrop;
1685 uint32_t *tty;
1686 char cores_str[25] = "unlimited";
1687 unsigned cores = get_cpu_cores(c);
1688
1689 printf("%s:\n", c->name);
1690 printf("\tpre-start hook: %s\n", get_pre_start_hook(c));
1691 printf("\tpre-exec hook: %s\n", get_pre_exec_hook(c));
1692 root = get_root_dir(c);
1693 printf("\troot dir: %s\n", root);
1694 free(root);
1695 printf("\tinit path: %s\n", get_init_path(c));
1696 for (n = 0; n < c->num_ifspecs; n++) {
1697 char pretty_hwaddr[18];
1698 char *iface = interface_name(c, n, false);
1699 pretty_print_hwaddr(c->ifspec[n].hwaddr, pretty_hwaddr);
1700 printf("\tinterface #%u: %s (%s)\n", n, iface, pretty_hwaddr);
1701 free(iface);
1702 }
1703 N = get_dacl(c, &word_list);
1704 for (n = 0; n < N; n++)
1705 printf("\tdac entry #%u: %s %s\n", n, word_list[n][0] == 'a'?
1706 "allow" : "deny", word_list[n] + 1);
1707 N = get_iospecs(c, &word_list);
1708 for (n = 0; n < N; n++)
1709 printf("\tiospec #%u: %s\n", n, word_list[n]);
1710 if (cores > 0)
1711 sprintf(cores_str, "%u", cores);
1712 printf("\tCPU core limit: %s\n", cores_str);
1713 printf("\tmemory limit: %uG\n", get_memory_limit(c));
1714 N = get_capdrops(c, &capdrop);
1715 for (n = 0; n < N; n++)
1716 printf("\tcapdrop #%u: %s\n", n, cap_to_name(capdrop[n]));
1717 N = get_container_ttys(c, &tty);
1718 for (n = 0; n < N; n++)
1719 printf("\ttty #%u: %u\n", n, tty[n]);
1720 }
1721
1722 static bool com_ls(void)
1723 {
1724 struct container *c;
1725 bool skipped, success = true;
1726 struct container_arg_iter cai = INITIALIZED_CAI(cai);
1727
1728 while ((c = cai_next(&cai, &skipped))) {
1729 pid_t pid;
1730 if (skipped)
1731 success = false;
1732 if (!is_locked(c->name, &pid)) {
1733 if (!OPT_GIVEN(LS, ALL)) {
1734 success =false;
1735 continue;
1736 }
1737 pid = 0;
1738 }
1739 if (OPT_GIVEN(LS, VERBOSE)) {
1740 list_container_verbose(c);
1741 continue;
1742 }
1743 if (OPT_GIVEN(LS, LONG)) {
1744 if (pid > 0)
1745 printf("%u\t", (unsigned)pid);
1746 else
1747 printf("-\t");
1748 printf("%u\t", get_cpu_cores(c));
1749 printf("%uG\t", get_memory_limit(c));
1750 printf("%s\n", c->name);
1751 continue;
1752 }
1753 if (!OPT_GIVEN(LS, QUIET))
1754 printf("%s\n", c->name);
1755 }
1756 if (skipped) /* needed if the last given container arg is invalid */
1757 success = false;
1758 return success;
1759 }
1760 EXPORT_CMD_HANDLER(ls);
1761
1762 static bool list_container_processes(const struct container *c)
1763 {
1764 int pid;
1765 char str[20];
1766 char *argv[] = {"pstree", "-anp", str, NULL};
1767 bool success;
1768
1769 success = is_locked(c->name, &pid);
1770 if (!success) {
1771 if (lls_num_inputs(sublpr) == 0)
1772 return true;
1773 ERROR_LOG("container \"%s\" is not running\n", c->name);
1774 return false;
1775 }
1776 if (!OPT_GIVEN(PS, ALL) && !request_init_pid(c->name, &pid))
1777 return false;
1778 sprintf(str, "%d", pid);
1779 success = xexec(argv, NULL);
1780 return success;
1781 }
1782
1783 static bool com_ps(void)
1784 {
1785 return for_each_container_arg(list_container_processes);
1786 }
1787 EXPORT_CMD_HANDLER(ps);
1788
1789 static bool com_attach(void)
1790 {
1791 char *errctx;
1792 const char *arg;
1793 pid_t pid;
1794 char *socket_path;
1795 int master, ret, socket_fd;
1796 bool have_escape = false;
1797 struct termios tios;
1798 uint32_t minor = OPT_UINT32_VAL(ATTACH, TTY);
1799 char *rq;
1800
1801 if (!isatty(STDIN_FILENO) || !isatty(STDOUT_FILENO)) {
1802 ERROR_LOG("both stdin and stdout must be terminals\n");
1803 return false;
1804 }
1805 if (tcgetattr(STDIN_FILENO, &tios) < 0)
1806 die_errno("tcgetattr");
1807 ret = lls_check_arg_count(sublpr, 1, 1, &errctx);
1808 if (ret < 0)
1809 die_lopsub(ret, &errctx);
1810 arg = lls_input(0, sublpr);
1811 if (!is_locked(arg, &pid)) {
1812 ERROR_LOG("container not running: %s\n", arg);
1813 return false;
1814 }
1815 socket_path = get_socket_path(arg);
1816 if (OPT_GIVEN(ATTACH, FORCE))
1817 rq = msg("force-attach %u", minor);
1818 else
1819 rq = msg("attach %u", minor);
1820 socket_fd = request_fd(socket_path, rq, &master);
1821 free(rq);
1822 free(socket_path);
1823 INFO_LOG("Attached to /dev/tty%u of container %s\n", minor, arg);
1824 NOTICE_LOG("Type CTRL+a q to quit\n");
1825 setup_termios(STDIN_FILENO);
1826 setup_termios(master);
1827 for (;;) {
1828 int max_fileno = 0;
1829 fd_set fds;
1830 FD_ZERO(&fds);
1831 FD_SET(STDIN_FILENO, &fds);
1832 if (STDIN_FILENO > max_fileno)
1833 max_fileno = STDIN_FILENO;
1834 FD_SET(master, &fds);
1835 if (master > max_fileno)
1836 max_fileno = master;
1837 FD_SET(socket_fd, &fds);
1838 if (socket_fd > max_fileno)
1839 max_fileno = socket_fd;
1840 if (select(max_fileno + 1, &fds, NULL, NULL, NULL) < 0) {
1841 if (errno != EINTR)
1842 ERROR_LOG("select: %m\n");
1843 continue;
1844 }
1845 if (FD_ISSET(socket_fd, &fds))
1846 break;
1847 if (FD_ISSET(STDIN_FILENO, &fds)) {
1848 char c;
1849 if (read(STDIN_FILENO, &c, 1) <= 0)
1850 break;
1851 if (c == 1 && !have_escape)
1852 have_escape = true;
1853 else if (c == 'q' && have_escape)
1854 break;
1855 else if (write(master, &c, 1) != 1)
1856 break;
1857 }
1858 if (FD_ISSET(master, &fds)) {
1859 if (!copy(master, STDOUT_FILENO))
1860 break;
1861 }
1862 }
1863 if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &tios) < 0)
1864 ERROR_LOG("tcsetattr: %m\n");
1865 printf("\n");
1866 return false;
1867 }
1868 EXPORT_CMD_HANDLER(attach);
1869
1870 static bool com_help(void)
1871 {
1872 int ret;
1873 char *errctx, *help;
1874 const char *arg;
1875 const struct lls_command *cmd;
1876
1877 ret = lls_check_arg_count(sublpr, 0, 1, &errctx);
1878 if (ret < 0)
1879 die_lopsub(ret, &errctx);
1880 if (lls_num_inputs(sublpr) == 0) {
1881 show_subcommand_summary(OPT_GIVEN(HELP, LONG));
1882 return true;
1883 }
1884 arg = lls_input(0, sublpr);
1885 ret = lls_lookup_subcmd(arg, micoforia_suite, &errctx);
1886 if (ret < 0)
1887 die_lopsub(ret, &errctx);
1888 cmd = lls_cmd(ret, micoforia_suite);
1889 if (OPT_GIVEN(HELP, LONG))
1890 help = lls_long_help(cmd);
1891 else
1892 help = lls_short_help(cmd);
1893 printf("%s\n", help);
1894 free(help);
1895 return true;
1896 }
1897 EXPORT_CMD_HANDLER(help);
1898
1899 static bool com_configtest(void)
1900 {
1901 printf("Syntax Ok\n");
1902 return true;
1903 }
1904 EXPORT_CMD_HANDLER(configtest);
1905
1906 static bool com_edit(void)
1907 {
1908 char *ed = getenv("EDITOR"); /* must not be freed */
1909 char *conf = get_config_file_path();
1910 char *argv[] = {ed? ed : "vi", conf, NULL};
1911 bool success = xexec(argv, NULL);
1912
1913 free(conf);
1914 return success;
1915 }
1916 EXPORT_CMD_HANDLER(edit);
1917
1918 static bool com_enter(void)
1919 {
1920 char str[20];
1921 char **argv;
1922 char *nsenter_args[] = {"nsenter", "-w", "-a", "-r", "-t"};
1923 const unsigned nna = ARRAY_SIZE(nsenter_args); /* num nsenter args */
1924 char *dflt_cmd[] = {"login", "-f", "root"};
1925 unsigned n, N, ni = lls_num_inputs(sublpr);
1926 unsigned nea = ni > 1? ni - 1 : ARRAY_SIZE(dflt_cmd); /* num extra args */
1927 const char *arg;
1928 bool success;
1929 int ret, pid;
1930 char *errctx;
1931
1932 ret = lls_check_arg_count(sublpr, 1, INT_MAX, &errctx);
1933 if (ret < 0)
1934 die_lopsub(ret, &errctx);
1935 arg = lls_input(0, sublpr);
1936 if (!is_locked(arg, &pid)) {
1937 ERROR_LOG("container not running: %s\n", arg);
1938 return false;
1939 }
1940 if (!request_init_pid(arg, &pid))
1941 return false;
1942 N = nna + nea + 2; /* +1 for arg to -t and +1 for terminating NULL */
1943 argv = xmalloc(N * sizeof(char *));
1944 for (n = 0; n < nna; n++)
1945 argv[n] = nsenter_args[n];
1946 sprintf(str, "%d", pid);
1947 argv[nna] = str;
1948 for (n = 0; n < nea; n++)
1949 argv[nna + 1 + n] = ni > 1? (char *)lls_input(n + 1, sublpr)
1950 : dflt_cmd[n];
1951 argv[N - 1] = NULL;
1952 clean_env();
1953 success = xexec(argv, NULL);
1954 free(argv);
1955 return success;
1956 }
1957 EXPORT_CMD_HANDLER(enter);
1958
1959 static bool com_log(void)
1960 {
1961 int ret;
1962 char *errctx, *logfile;
1963 bool success, use_less = isatty(STDIN_FILENO) && isatty(STDOUT_FILENO);
1964 char *argv[] = {use_less? "less" : "cat", NULL /* filename */, NULL};
1965
1966 ret = lls_check_arg_count(sublpr, 1, 1, &errctx);
1967 if (ret < 0)
1968 die_lopsub(ret, &errctx);
1969 logfile = get_container_logfile(lls_input(0, sublpr));
1970 argv[1] = logfile;
1971 success = xexec(argv, NULL);
1972 free(logfile);
1973 return success;
1974 }
1975 EXPORT_CMD_HANDLER(log);
1976
1977 int main(int argc, char *argv[])
1978 {
1979 int ret;
1980 char *errctx;
1981 const struct micoforia_user_data *ud;
1982 unsigned num_inputs;
1983
1984 valid_fd012();
1985 parse_options(argc, argv, CMD_PTR(MICOFORIA), &lpr);
1986 loglevel_arg_val = OPT_UINT32_VAL(MICOFORIA, LOGLEVEL);
1987 check_options();
1988 num_inputs = lls_num_inputs(lpr);
1989 ret = lls_lookup_subcmd(argv[argc - num_inputs], micoforia_suite, &errctx);
1990 if (ret < 0)
1991 die_lopsub(ret, &errctx);
1992 subcmd = lls_cmd(ret, micoforia_suite);
1993 parse_options(num_inputs, argv + argc - num_inputs, subcmd, &sublpr);
1994 ud = lls_user_data(subcmd);
1995 exit(ud->handler()? EXIT_SUCCESS : EXIT_FAILURE);
1996 }