playground

Sandbox, container or whatever utilities for linux.
git clone https://pi.duncano.de/git/playground.git
Log | Files | Refs | README

commit b017ceb8851a224c3e791883d8800dba57856a6c
Author: Duncaen <mail@duncano.de>
Date:   Sun, 19 Feb 2017 04:34:23 +0100

initial commit

Diffstat:
.gitignore | 4++++
Makefile | 46++++++++++++++++++++++++++++++++++++++++++++++
README | 31+++++++++++++++++++++++++++++++
libnewns.c | 259+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
libpledge.c | 725+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
newns.1 | 0
newns.2 | 0
newns.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
newns.h | 0
pledge.1 | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
pledge.2 | 0
pledge.c | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
pledge.h | 1+
13 files changed, 1289 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,4 @@ +*.[ao] +*.so +newns +pledge diff --git a/Makefile b/Makefile @@ -0,0 +1,46 @@ +.error : This Makefile needs GNU make +CFLAGS+=-g -O2 -Wall -Wno-switch -Wextra -fstack-protector-strong -D_FORTIFY_SOURCE=2 + +DESTDIR= +PREFIX=/usr/local +BINDIR=$(PREFIX)/bin +LIBDIR=$(PREFIX)/lib +INCDIR=$(PREFIX)/include +MANDIR=$(PREFIX)/share/man + +PROGS = pledge newns +LIBS = libpledge libnewns +ALL = $(LIBS:=.a) $(LIBS:=.so) $(PROGS) + +all: $(ALL) + +$(PROGS) : % : %.o +$(LIBS:=.a) : %.a : %.o +$(LIBS:=.so) : %.so : %.o + +pledge: libpledge.a +newns: libnewns.a + +pledge: + $(CC) $^ -o $@ $(LDFLAGS) + +ns: + $(CC) $^ -o $@ $(LDFLAGS) + +%.a: + ar rc $@ $^ + +%.so: + +clean: FRC + -rm -f $(ALL) *.o + +install: FRC all + mkdir -p $(DESTDIR)$(BINDIR) \ + $(DESTDIR)$(LIBDIR) \ + $(DESTDIR)$(INCDIR) + install -m0644 libpledge.a libpledge.so $(DESTDIR)$(LIBDIR) + install -m0644 pledge.h $(DESTDIR)$(INCDIR) + install -m0755 pledge $(DESTDIR)$(BINDIR) + +FRC: diff --git a/README b/README @@ -0,0 +1,31 @@ +playground +========== + +Sandbox, container or (whatever you want to call it) utilities for linux. + +There is still a lot to do, `pledge` should already work, but it might be +renamed later to be not confused with a similar api for a different OS. +At the moment `newns` is just an idea with some very basic code that does +not even compile and some docs on how or what it should do. + +Usage +----- + +To just restrict the allowed syscalls: + + $ pledge -p "proc rpath" sh + +To create a new "container" (unshare all possible namespaces) and share the +base filesystem (/{bin,sbin,lib,var,usr,etc}) with it: + + $ newns -f "base container" sh + +Or both together: + + $ newns -f "base container" pledge -p "proc rpath" sh + +Install +------- + + $ make + # make install diff --git a/libnewns.c b/libnewns.c @@ -0,0 +1,259 @@ +#include <stdint.h> + +enum { + NEWNS_EQUAL = 0x000001, + NEWNS_NEW = 0x000002, + NEWNS_NOT = 0x000004, + + NEWNS_BASE = 0x000010. + NEWNS_ROOT = 0x000020, + NEWNS_TMP = 0x000040, + NEWNS_BIN = 0x000080, + NEWNS_ETC = 0x000100, + + NEWNS_DEV = 0x001000, + NEWNS_TMP = 0x002000, + NEWNS_SYS = 0x004000, + NEWNS_RPOC = 0x008000, +}; + +struct namespace { + char *name; + uint64_t clone; + uint64_t flags; +}; + +struct nsmount { + const char *source; + const char *target; + const char *type; + unsigned long flags; + const void *data; +}; + +static struct nsmount *mounts, *tmp, *dev, *sys, *proc, *root; +static char *dir; + +static const struct namespace namespaces[] = { + { "cgroup", CLONE_NEWCGROUP, 0 }, + { "ipc", CLONE_NEWIPC, 0 }, + { "mount", CLONE_NEWNS, 0 }, + { "net", CLONE_NEWNET, 0 }, + { "pid", CLONE_NEWPID, 0 }, + { "user", CLONE_NEWUSER, 0 }, + { "uts", CLONE_NEWUTS, 0 }, + + { "base", CLONE_NEWNS, NEWNS_BASE }, + { "bin", CLONE_NEWNS, NEWNS_BIN }, + { "var", CLONE_NEWNS, NEWNS_VAR }, + { "usr", CLONE_NEWNS, NEWNS_USR }, + { "sys", CLONE_NEWNS, NEWNS_SYS }, + { "home", CLONE_NEWNS, NEWNS_HOME }, + { "tmp", CLONE_NEWNS, NEWNS_TMP }, + { "root", CLONE_NEWUSER, NEWNS_ROOT }, + { "container", CLONE_NEWIPC | CLONE_NEWNS | CLONE_NEWNET | + CLONE_NEWPID | CLONE_NEWUSER | CLONE_NEWUTS, 0 }, + { 0, 0, 0 }, +}; + +/* nsflags are keywords that can be prefixed with a special char + * to change the change the meaning of the flag. + * + * The following special chars are supported: + * - ! to remove a flag, if previously defined from `nsfile` or another flag. + * - = might be dropped + * - + might be dropped + * + * There are two types of flags, the namespace flags and filesystem flags. + * The namespace flags are used with `clone(2)`, this part is handled by + * the kernel. Filesystem flags indicate if parts of the filesystem + * should be shared, this can be used to do simple "sandboxing" to just + * share relevant parts of the filesystem. Without filesystem flags or + * only a few that dont define the environmen, newns acts more like + * a "container" or chroot tool. + * + * Namespace related flags: + * - cgroup - to create a new cgroup namespace + * - ipc - to create a new ipc namespace + * - mount - to create a new mount namespace + * - net - to create a new net namespace + * - pid - to create a new pid namespace + * - user - to create a new user namespace + * - uts - to create a new uts namespace + * - container - to create all namespaces new + * + * Filesystem related flags: + * - base - implies sys, bin, var, usr and etc flags + * - sys - shares /{tmp,dev,proc,sys} with the host + * - bin - shares /bin and /lib with the host + * - etc - shares /etc with the host + * - var - shares /etc with the host + * - usr - shares /etc with the host + * - home - shares /home with the host + * - root - shares everything `/` with the host + * - ro - everything is readonly from inside of the namespace + * - overlay - every change from inside the namespace to the filesystem + * is done in a overlayed directory structure. + */ +static int +nsflags(const char *s, uint64_t *cflags, uint64_t *flags) +{ + const struct namespace *np; + uint64_t flags; + + if (!s || !*s) + return 0; + + if (strchr("!=@", *s)) + switch (*s++) { + case '!': flags |= NEWNS_NOT; break; + case '=': flags |= NEWNS_EQUAL; break; + case '+': flags |= NEWNS_NEW; break; + } + + if (!*s) + return 0; + + for (np = namespaces; *ns->name; ns++) + if (strcmp(*s, ns->name) == 0) + break; + + if (!*ns->name) + return 0; + + *cflags =| ns->clone; + *flags =| ns->flags; + + return 0; +} + +static int +addmount(const char *src, const char *dest, const char *type, + unsigned long flags, const void *data) +{ + struct nsmount *mp; + for (mp = mounts; mp; mp = mp->next) + if (strcmp(dest, mp->dest) == 0) + break; + if (!mp) + if (!(mp = calloc(1, sizeof(struct nsmount)))) + return -1; + mp->source = src; + mp->target = dest; + mp->type = type; + mp->flags = flags; + mp->data = data; + mp->next = 0; + return 0; +} + +static char * +getword(char *s) +{ + char *buf; + return buf; +} + + +/* nsfiles are newline seperated short files, empty lines and + * lines starting with a # are ignored. + * Each line starts with one of the following keywords: + * - mount [proc|tmpfs|sysfs|devpts|devtmpfs] [target] + * - bind source [target] + * - chdir [dir] + * - flags [namespace...] + */ +static int +nsfile(const char *file, uint64_t *cflags, uint64_t *flags) +{ + char *args[4]; + char *arg, *p, *s; + + arg = *args; + p = strchr(s, ' '); + + for (line = ; *line; line++) { + if (!*line || *line = '#') + continue; + if (strncmp(s, "mount", p-s)) { + if (!(*arg++ = getword(s+p+1))) + goto err; + if (strcmp(*args, "proc") != 0 && + strcmp(*args, "tmpfs") != 0 && + strcmp(*args, "sysfs") != 0 && + strcmp(*args, "devpts") != 0 && + strcmp(*args, "devtmpfs") != 0) + return -1; + addmount(args[0], args[1], args[0], 0, 0, 0); + } else if (strncmp(s, "bind", p-s)) { + if (!(*arg++ = getword(s+p+1)) || + !(*args = getword(s+p+1))) + goto err; + addmount(*args, *arg, 0, MS_BIND, 0); + } else if (strncmp(s, "chdir", p-s)) { + if (!(dir = getword(s+p+1))) + goto err; + } else if (strncmp(s, "flags", p-s)) { + for ((p = strtok(p, " ")); p; (p = strtok(0, " "))) { + if (nsflags(p, &cflags, &flags) == -1) { + free(buf); + errno = EINVAL; + return -1; + } + } + } + } + + return 0; +err: + return -1; +} + +int +newns(const char *namespaces, const char *nsfiles[]) +{ + char newpath[PATH_MAX]; + char *buf, *p; + uint64_t cflags, flags; + int i, rv; + + for (p = *nsfile; *p; p++) + if (nsfile(p, &cflags, &flags) == -1) + return -1; + + buf = strdup(namespaces); + for ((p = strtok(buf, " ")); p; (p = strtok(0, " "))) { + if (nsflags(p, &cflags, &flags) == -1) { + free(buf); + errno = EINVAL; + return -1; + } + } + free(buf); + +#if 0 + if (unshare(cflags) == -1) + return -1; +#endif + + + if (!root) { + errno = EINVAL; + return -1; + } + + rv = mount(root->source, root->target, root->type, root->flags, root->data); + if (rv == -1) + return -1; + + struct nsmount *mp; + for (mp = nsmount; nsmount; mp = mp->next) { + snprintf(newpath, "%s/%s", nsdir, mp->target); + if (mkdir(newpath) == -1) + return -1; + if (mount(mp->source, newpath, mp->type, mp->flags, mp->data) == -1) + return -1; + } + + return 0; +} diff --git a/libpledge.c b/libpledge.c @@ -0,0 +1,725 @@ +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <stdint.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> +#include <endian.h> + +#include <asm/bitsperlong.h> /* for __BITS_PER_LONG */ + +#include <sys/prctl.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <sys/socket.h> + +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <linux/audit.h> + +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define _LO_ARG(idx) \ + offsetof(struct seccomp_data, args[(idx)]) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define _LO_ARG(idx) \ + offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) +#else +#error "Unknown endianness" +#endif + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ENDIAN(_lo, _hi) _lo, _hi +# define _HI_ARG(idx) \ + offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ENDIAN(_lo, _hi) _hi, _lo +# define _HI_ARG(idx) \ + offsetof(struct seccomp_data, args[(idx)]) +#else +# error "Unknown endianness" +#endif + +union arg64 { + struct { + __u32 ENDIAN(lo32, hi32); + }; + __u64 u64; +}; + +#define _LOAD_SYSCALL_NR \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, nr)) + +#define _LOAD_ARCH \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, arch)) + +#define _ARG32(idx) \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_LD+BPF_W+BPF_ABS, _LO_ARG(idx)) + +#define _ARG64(idx) \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_LD+BPF_W+BPF_ABS, _LO_ARG(idx)), \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_ST, 0), \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_LD+BPF_W+BPF_ABS, _HI_ARG(idx)), \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_ST, 1) + +#define _JUMP_EQ(val, jt, jf) \ + *fp++ = (struct sock_filter)BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (val), (jt), (jf)) + +#define _JUMP_EQ64(val, jt, jf) \ + *fp++ = (struct sock_filter)BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ + ((union arg64){.u64 = (val)}).hi32, 0, (jf)), \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_LD+BPF_MEM, 0), \ + *fp++ = (struct sock_filter)BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ + ((union arg64){.u64 = (val)}).lo32, (jt), (jf)) + +#define _JUMP(val) \ + *fp++ = (struct sock_filter)BPF_JUMP(BPF_JMP+BPF_JA, (val), 0xFF, 0xFF) + +#define _RET(x) \ + *fp++ = (struct sock_filter)BPF_STMT(BPF_RET+BPF_K, (x)) + +#define _END \ + len-(fp-fprog->filter)-1 + +enum { + PLEDGED = 0x100000, + PLEDGE_ALWAYS = 0xffffff, + PLEDGE_IOCTL = 0x010001, + PLEDGE_RPATH = 0x000002, + PLEDGE_WPATH = 0x000004, + PLEDGE_CPATH = 0x000008, + PLEDGE_STDIO = 0x000010, + PLEDGE_CHOWN = 0x000020, + PLEDGE_DPATH = 0x000040, + PLEDGE_DRM = 0x000080, + PLEDGE_EXEC = 0x000100, + PLEDGE_FATTR = 0x000200, + PLEDGE_FLOCK = 0x000400, + PLEDGE_GETPW = 0x000800, + PLEDGE_INET = 0x001000, + PLEDGE_PROC = 0x002000, + PLEDGE_ID = 0x004000, + PLEDGE_SETTIME = 0x008000, + PLEDGE_UNIX = 0x008000, + PLEDGE_CHOWNUID = 0x010000, +}; + +struct promise { + char *name; + uint64_t flags; +}; + +static const struct promise strpromises[] = { + { "chown", PLEDGE_CHOWN | PLEDGE_CHOWNUID }, + { "cpath", PLEDGE_CPATH }, + { "dpath", PLEDGE_DPATH }, + { "drm", PLEDGE_DRM }, + { "exec", PLEDGE_EXEC }, + { "fattr", PLEDGE_FATTR | PLEDGE_CHOWN }, + { "flock", PLEDGE_FLOCK }, + { "getpw", PLEDGE_GETPW }, + { "id", PLEDGE_ID }, + { "inet", PLEDGE_INET }, + { "ioctl", PLEDGE_IOCTL }, + { "proc", PLEDGE_PROC }, + { "rpath", PLEDGE_RPATH }, + { "settime", PLEDGE_SETTIME }, + { "stdio", PLEDGE_STDIO }, + { "unix", PLEDGE_UNIX }, + { "wpath", PLEDGE_WPATH }, + { 0, 0 }, +}; + +const uint64_t pledge_syscalls[] = { + /**/ + [SYS_exit] = PLEDGE_ALWAYS, + [SYS_exit_group] = PLEDGE_ALWAYS, + [SYS_seccomp] = PLEDGE_ALWAYS, + [SYS_prctl] = PLEDGE_ALWAYS | PLEDGE_PROC, + + [SYS_getuid] = PLEDGE_STDIO, + [SYS_geteuid] = PLEDGE_STDIO, + [SYS_getresuid] = PLEDGE_STDIO, + [SYS_getgid] = PLEDGE_STDIO, + [SYS_getegid] = PLEDGE_STDIO, + [SYS_getresgid] = PLEDGE_STDIO, + [SYS_getgroups] = PLEDGE_STDIO, + [SYS_getpgrp] = PLEDGE_STDIO, + [SYS_getpgid] = PLEDGE_STDIO, + [SYS_getppid] = PLEDGE_STDIO, + [SYS_getsid] = PLEDGE_STDIO, + [SYS_getrlimit] = PLEDGE_STDIO, + [SYS_gettimeofday] = PLEDGE_STDIO, + [SYS_getrusage] = PLEDGE_STDIO, + [SYS_clock_getres] = PLEDGE_STDIO, + [SYS_clock_gettime] = PLEDGE_STDIO, + [SYS_getpid] = PLEDGE_STDIO, + [SYS_uname] = PLEDGE_STDIO, + [SYS_sysinfo] = PLEDGE_STDIO, + [SYS_madvise] = PLEDGE_STDIO, +#if defined(SYS_fadvise64) && SYS_fadvise64 != SYS_fadvise + [SYS_fadvise64] = PLEDGE_STDIO, +#endif + [SYS_mmap] = PLEDGE_STDIO, +#if defined(SYS_mmap2) + [SYS_mmap2] = PLEDGE_STDIO, +#endif + [SYS_mprotect] = PLEDGE_STDIO, + [SYS_munmap] = PLEDGE_STDIO, + [SYS_msync] = PLEDGE_STDIO, + [SYS_brk] = PLEDGE_STDIO, + [SYS_umask] = PLEDGE_STDIO, + [SYS_read] = PLEDGE_STDIO, +#if defined(SYS_read64) && SYS_read64 != SYS_read + [SYS_read64] = PLEDGE_STDIO, +#endif + [SYS_readv] = PLEDGE_STDIO, +#if defined(SYS_pread64) && SYS_pread64 != SYS_pread + [SYS_pread64] = PLEDGE_STDIO, +#endif + [SYS_preadv] = PLEDGE_STDIO, + [SYS_write] = PLEDGE_STDIO, +#if defined(SYS_write64) && SYS_write64 != SYS_write + [SYS_write64] = PLEDGE_STDIO, +#endif +#if defined(SYS_pwrite64) && SYS_pwrite64 != SYS_pwrite + [SYS_pwrite64] = PLEDGE_STDIO, +#endif + [SYS_writev] = PLEDGE_STDIO, + [SYS_pwritev] = PLEDGE_STDIO, + [SYS_recvmsg] = PLEDGE_STDIO, + [SYS_recvfrom] = PLEDGE_STDIO, + [SYS_ftruncate] = PLEDGE_STDIO, + [SYS_futex] = PLEDGE_STDIO, + [SYS_lseek] = PLEDGE_STDIO, + [SYS_sendto] = PLEDGE_STDIO, + [SYS_sendmsg] = PLEDGE_STDIO, + [SYS_nanosleep] = PLEDGE_STDIO, + [SYS_sigaltstack] = PLEDGE_STDIO, + [SYS_rt_sigprocmask] = PLEDGE_STDIO, + [SYS_rt_sigsuspend] = PLEDGE_STDIO, + [SYS_rt_sigaction] = PLEDGE_STDIO, + [SYS_rt_sigreturn] = PLEDGE_STDIO, + [SYS_rt_sigpending] = PLEDGE_STDIO, + [SYS_getitimer] = PLEDGE_STDIO, + [SYS_setitimer] = PLEDGE_STDIO, + [SYS_alarm] = PLEDGE_STDIO, + [SYS_poll] = PLEDGE_STDIO, + [SYS_ppoll] = PLEDGE_STDIO, + [SYS_eventfd] = PLEDGE_STDIO, + [SYS_epoll_create] = PLEDGE_STDIO, + [SYS_epoll_create1] = PLEDGE_STDIO, + [SYS_epoll_ctl] = PLEDGE_STDIO, + [SYS_epoll_ctl_old] = PLEDGE_STDIO, + [SYS_epoll_pwait] = PLEDGE_STDIO, + [SYS_epoll_wait] = PLEDGE_STDIO, + [SYS_epoll_wait_old] = PLEDGE_STDIO, + [SYS_select] = PLEDGE_STDIO, + [SYS_pselect6] = PLEDGE_STDIO, + [SYS_fstat] = PLEDGE_STDIO, + [SYS_fsync] = PLEDGE_STDIO, + [SYS_setsockopt] = PLEDGE_STDIO, + [SYS_getsockopt] = PLEDGE_STDIO, + [SYS_fcntl] = PLEDGE_STDIO, + [SYS_close] = PLEDGE_STDIO, + [SYS_tee] = PLEDGE_STDIO, + [SYS_splice] = PLEDGE_STDIO, + [SYS_dup] = PLEDGE_STDIO, + [SYS_dup2] = PLEDGE_STDIO, + [SYS_dup3] = PLEDGE_STDIO, + [SYS_shutdown] = PLEDGE_STDIO, + [SYS_fchdir] = PLEDGE_STDIO, + [SYS_pipe] = PLEDGE_STDIO, + [SYS_pipe2] = PLEDGE_STDIO, + [SYS_socketpair] = PLEDGE_STDIO, + [SYS_wait4] = PLEDGE_STDIO, + [SYS_kill] = PLEDGE_STDIO, + [SYS_ioctl] = PLEDGE_STDIO, + [SYS_open] = PLEDGE_STDIO, + [SYS_stat] = PLEDGE_STDIO, +#if defined(SYS_stat64) && SYS_stat64 != SYS_stat + [SYS_stat64] = PLEDGE_STDIO, +#endif + [SYS_access] = PLEDGE_STDIO, + [SYS_readlink] = PLEDGE_STDIO, + + [SYS_settimeofday] = PLEDGE_SETTIME, + + [SYS_chdir] = PLEDGE_RPATH, + [SYS_openat] = PLEDGE_RPATH | PLEDGE_WPATH, + [SYS_newfstatat] = PLEDGE_RPATH | PLEDGE_WPATH, + [SYS_faccessat] = PLEDGE_RPATH | PLEDGE_WPATH, + [SYS_getcwd] = PLEDGE_RPATH | PLEDGE_WPATH, + [SYS_readlinkat] = PLEDGE_RPATH | PLEDGE_WPATH, + [SYS_lstat] = PLEDGE_RPATH | PLEDGE_WPATH, +#if defined(SYS_lstat64) && SYS_lstat64 != SYS_lstat + [SYS_lstat64] = PLEDGE_STDIO, +#endif + [SYS_truncate] = PLEDGE_WPATH, +#if defined(SYS_truncate64) && SYS_truncate64 != SYS_truncate + [SYS_truncate64] = PLEDGE_STDIO, +#endif + [SYS_rename] = PLEDGE_RPATH | PLEDGE_CPATH, + [SYS_rmdir] = PLEDGE_CPATH, + [SYS_renameat] = PLEDGE_CPATH, + [SYS_renameat2] = PLEDGE_CPATH, + [SYS_link] = PLEDGE_CPATH, + [SYS_linkat] = PLEDGE_CPATH, + [SYS_lremovexattr] = PLEDGE_CPATH, + [SYS_lsetxattr] = PLEDGE_CPATH, + [SYS_symlink] = PLEDGE_CPATH, + [SYS_unlink] = PLEDGE_CPATH, + [SYS_unlinkat] = PLEDGE_CPATH, + [SYS_mkdir] = PLEDGE_CPATH, + [SYS_mkdirat] = PLEDGE_CPATH, + + [SYS_getdents] = PLEDGE_RPATH, +#if defined(SYS_getdents64) && SYS_getdents64 != SYS_getdents + [SYS_getdents64] = PLEDGE_RPATH, +#endif + [SYS_statfs] = PLEDGE_RPATH, + [SYS_fstatfs] = PLEDGE_RPATH, + [SYS_listxattr] = PLEDGE_RPATH, + [SYS_llistxattr] = PLEDGE_RPATH, + + [SYS_utimes] = PLEDGE_FATTR, + [SYS_utimensat] = PLEDGE_FATTR, + [SYS_chmod] = PLEDGE_FATTR, + [SYS_fchmod] = PLEDGE_FATTR, + [SYS_fchmodat] = PLEDGE_FATTR, + + [SYS_chown] = PLEDGE_CHOWN, + [SYS_fchownat] = PLEDGE_CHOWN, + [SYS_lchown] = PLEDGE_CHOWN, + [SYS_fchown] = PLEDGE_CHOWN, + + [SYS_clone] = PLEDGE_PROC, + [SYS_fork] = PLEDGE_PROC, + [SYS_vfork] = PLEDGE_PROC, + [SYS_unshare] = PLEDGE_PROC, + [SYS_setpgid] = PLEDGE_PROC, + [SYS_setsid] = PLEDGE_PROC, + [SYS_set_tid_address] = PLEDGE_PROC, + [SYS_set_robust_list] = PLEDGE_PROC, + [SYS_get_robust_list] = PLEDGE_PROC, + + [SYS_setrlimit] = PLEDGE_PROC | PLEDGE_ID, + [SYS_prlimit64] = PLEDGE_PROC | PLEDGE_ID, + [SYS_getpriority] = PLEDGE_PROC | PLEDGE_ID, + [SYS_setpriority] = PLEDGE_PROC | PLEDGE_ID, + + [SYS_setuid] = PLEDGE_ID, + [SYS_setreuid] = PLEDGE_ID, + [SYS_setresuid] = PLEDGE_ID, + [SYS_setgid] = PLEDGE_ID, + [SYS_setregid] = PLEDGE_ID, + [SYS_setresgid] = PLEDGE_ID, + [SYS_setgroups] = PLEDGE_ID, + + [SYS_execve] = PLEDGE_EXEC, + [SYS_arch_prctl] = PLEDGE_EXEC, + + [SYS_socket] = PLEDGE_INET | PLEDGE_UNIX, + [SYS_connect] = PLEDGE_INET | PLEDGE_UNIX, + [SYS_bind] = PLEDGE_INET | PLEDGE_UNIX, + [SYS_getsockname] = PLEDGE_INET | PLEDGE_UNIX, + + [SYS_listen] = PLEDGE_INET | PLEDGE_UNIX, + [SYS_accept4] = PLEDGE_INET | PLEDGE_UNIX, + [SYS_accept] = PLEDGE_INET | PLEDGE_UNIX, + [SYS_getpeername] = PLEDGE_INET | PLEDGE_UNIX, + + [SYS_flock] = PLEDGE_FLOCK, +}; + +static struct sock_fprog * +pledge_whitelist(uint64_t flags) +{ + uint64_t len, num, i; + uint64_t calls[nitems(pledge_syscalls)]; + struct sock_fprog *fprog; + struct sock_filter *fp; + + num = 0; + + for (i = 0; i < nitems(pledge_syscalls); i++) { + if (!(flags & pledge_syscalls[i])) + continue; + calls[num++] = i; +#ifdef TEST + fprintf(stderr, "whitelist syscall %ld\n", i); +#endif + } + + /* space for all syscall comparisons */ + len = num; + /* space arch validation, syscall load and and two return statements */ + len += 5; + + if (!(fprog = calloc(1, sizeof(struct sock_fprog)))) + return 0; + if (!(fprog->filter = calloc(len, sizeof(struct sock_filter)))) { + free(fprog); + return 0; + } + fprog->len = len; + fp = fprog->filter; + + /* validate architecture, jump to the RET_KILL if not equal */ + _LOAD_ARCH; + _JUMP_EQ(AUDIT_ARCH_X86_64, 0, _END-1); + /* compare syscall numbers */ + _LOAD_SYSCALL_NR; + for (i = 0; i < num; i++) + _JUMP_EQ(calls[i], _END, 0); + /* no match */ + _RET(SECCOMP_RET_KILL); + /* matching syscall jump here */ + _RET(SECCOMP_RET_ALLOW); + + return fprog; +} + +static struct sock_fprog * +pledge_blacklist(uint64_t flags, uint64_t oldflags) +{ + uint64_t len, num, i; + uint64_t calls[nitems(pledge_syscalls)]; + struct sock_fprog *fprog; + struct sock_filter *fp; + + num = 0; + + for (i = 0; i < nitems(pledge_syscalls); i++) { + if (!pledge_syscalls[i]) + continue; + if ((flags & pledge_syscalls[i]) || !(oldflags & pledge_syscalls[i])) + continue; + calls[num++] = i; +#ifdef TEST + fprintf(stderr, "blacklist syscall %ld\n", i); +#endif + } + + /* no new rules to apply */ + if (!num) + return 0; + + /* space for all syscall comparisons */ + len = num; + /* syscall load and and two return statements */ + len += 3; + + if (!(fprog = calloc(1, sizeof(struct sock_fprog)))) + return 0; + if (!(fprog->filter = calloc(len, sizeof(struct sock_filter)))) { + free(fprog); + return 0; + } + fprog->len = len; + fp = fprog->filter; + + /* compare all syscall numbers */ + _LOAD_SYSCALL_NR; + for (i = 0; i < num; i++) + _JUMP_EQ(calls[i], _END, 0); + /* no match */ + _RET(SECCOMP_RET_ALLOW); + /* matching syscall jump here */ + _RET(SECCOMP_RET_KILL); + + return fprog; +} + +static struct sock_fprog * +pledge_filter(uint64_t flags, uint64_t oldflags) +{ + struct sock_fprog *fprog; + struct sock_filter *fp; + uint64_t len; + int allow_prctl, allow_socket, allow_selfkill, allow_fcntl, allow_selfchown, allow_ioctl; + + len = 0; + allow_selfchown = (!(flags & PLEDGE_CHOWNUID) && (flags & PLEDGE_CHOWN)) || 0; + allow_prctl = !(flags & PLEDGE_PROC) || 0; + allow_socket = (flags & PLEDGE_INET) || (flags & PLEDGE_UNIX) || 0; + allow_selfkill = (!(flags & PLEDGE_PROC)) || 0; + allow_fcntl = (!(flags & PLEDGE_PROC) && (flags & PLEDGE_STDIO)) || 0; + allow_ioctl = (!(flags & PLEDGE_IOCTL)) || 0; + + /* chown(2), fchown(2), lchown(2), fchownat(2) */ + if (allow_selfchown) + len += 32; + + if (allow_prctl) + len += 4; + + if (allow_socket) + len += 3; + + /* AF_INET[6]? */ + if ((flags&PLEDGE_INET)) + len += 2; + + /* AF_UNIX */ + if ((flags&PLEDGE_UNIX)) + len += 1; + + if (allow_selfkill) + len += 11; + + if (allow_fcntl) + len += 3; + + if (allow_ioctl) + len += 6; + + /* no new filters */ + if (!len) + return 0; + + /* space for 3 different return statements (KILL,ALLOW,EPERM) */ + len += 3; + +#ifdef TEST + printf("allowsocket %d unix=%d inet=%d\n", allow_socket, + ((flags&PLEDGE_UNIX) == PLEDGE_UNIX), + ((flags&PLEDGE_INET) == PLEDGE_INET)); + printf("allowselfchown %d\n", allow_selfchown); + printf("allowprctl %d\n", allow_prctl); + printf("allowselfkill %d\n", allow_selfkill); + printf("allowfcntl %d\n", allow_fcntl); + printf("allowbasicioctl %d\n", allow_ioctl); +#endif + + if (!(fprog = calloc(1, sizeof(struct sock_fprog)))) + return 0; + if (!(fprog->filter = calloc(len, sizeof(struct sock_filter)))) { + free(fprog); + return 0; + } + fprog->len = len; + fp = fprog->filter; + +#define _KILL _END +#define _EPERM _END-1 +#define _ALLOW _END-2 + + if (allow_selfchown) { + uid_t uid = getuid(); + gid_t gid = getgid(); + + /* chown(2), fchown(2), lchown(2) */ + _JUMP_EQ(SYS_chown, 3, 0); + _JUMP_EQ(SYS_fchown, 2, 0); + _JUMP_EQ(SYS_lchown, 0, 14); // XXX: fix offset + _ARG64(1); // +4 + _JUMP_EQ64(uid, 0, _EPERM); // +3 + _ARG64(2); // + 4 + _JUMP_EQ64(gid, _ALLOW, _EPERM); // +3 + + /* fchownat(2) */ + _JUMP_EQ(SYS_fchownat, 0, 14); // XXX: fix offset + _ARG64(2); // +4 + _JUMP_EQ64(uid, 0, _EPERM); // +3 + _ARG64(4); // + 4 + _JUMP_EQ64(gid, _ALLOW, _EPERM); // +3 + } + + if (allow_prctl) { + /* allow prctl(PR_[SG]ET_SECCOMP, ...) */ + _JUMP_EQ(SYS_prctl, 0, 3); + _ARG32(0); + _JUMP_EQ(PR_SET_SECCOMP, _ALLOW, 0); + _JUMP_EQ(PR_GET_SECCOMP, _ALLOW, _KILL); + } + + if (allow_socket) { + /* allow specific domains: socket(domain, .., ..) */ + _JUMP_EQ(SYS_socket, 0, 2 + ((flags & PLEDGE_INET) ? 2 : 0) + ((flags & PLEDGE_UNIX) ? 1 : 0)); + _ARG32(0); + if (flags & PLEDGE_INET) { + _JUMP_EQ(AF_INET, _ALLOW, 0); + _JUMP_EQ(AF_INET6, _ALLOW, 0); + } + if (flags & PLEDGE_UNIX) { + _JUMP_EQ(AF_UNIX, _ALLOW, 0); + } + _JUMP(_EPERM); + } + + if (allow_fcntl) { + /* allow fcntl(..., != F_SETOWN, ...) */ + _JUMP_EQ(SYS_fcntl, 0, 2); + _ARG32(1); + _JUMP_EQ(F_SETOWN, _EPERM, _ALLOW); + } + + if (allow_selfkill) { + pid_t pid = getpid(); + /* allow kill(0 | getpid(), ...) */ + _JUMP_EQ(SYS_kill, 0, 10); // XXX: fix offset + _ARG64(0); // +4 + _JUMP_EQ64(0, _ALLOW, 0); // +3 + _JUMP_EQ64(pid, _ALLOW, _EPERM); // +3 + } + + if (allow_ioctl) { + /* allow ioctl(..., FIONREAD|FIONBIO|FIOCLEX|FIONCLEX, ...) */ + _JUMP_EQ(SYS_kill, 0, 5); + _ARG32(1); + _JUMP_EQ(FIONREAD, _ALLOW, 0); + _JUMP_EQ(FIONBIO, _ALLOW, 0); + _JUMP_EQ(FIOCLEX, _ALLOW, 0); + _JUMP_EQ(FIONCLEX, _ALLOW, _KILL); + } + + /* no match */ + _RET(SECCOMP_RET_ALLOW); + /* no permissions */ + _RET(SECCOMP_RET_ERRNO|(EPERM & SECCOMP_RET_DATA)); + /* matching syscall jump here */ + _RET(SECCOMP_RET_KILL); + +#if TEST + printf("length=%ld expected=%ld\n", (fp-fprog->filter), len); +#endif + + return fprog; +} + +static uint64_t currflags = 0; + +/* + * pledge() makes use of seccomp layering, the first pledge call creates + * a whitelist white allowed systemcalls and if necessary a second layer + * with filters that look at arguments of systemcalls. + * further pledge() calls blacklist systemcalls that are not part of + * the new promises and adds the filter layaer if necessary. + * The BPF filters are as small as possible and never blacklist syscalls + * twice and never blacklists syscalls that were not initially whitelisted. + * + * There are some differences to the OpenBSD `pledge(2)` syscall. + * The OpenBSD implementation drops filters if `execve(2)` is called, this + * is not possible at this time with `seccomp(2)`. + * Furthermore in OpenBSDs implementation it is possible to use syscalls + * that operate in specific paths like /tmp without priviously promising it. + * The `paths` argument for `pledge(2)` from OpenBSDs pledge is deprecated + * and `pledge(2)` returns `EINVAL` if its not `NULL` this api does the same. + */ +int +pledge(const char *promises, const char *paths[]) +{ + const struct promise *pp; + struct sock_fprog *filterprog; + uint64_t flags, f; + int rv = 0; + char *buf, *p; + +#if TEST + printf("pledge(\"%s\", 0)\n", promises); +#endif + + if (paths) { + errno = EINVAL; + return -1; + } + + if (!promises) + return 0; + + flags = 0; + buf = strdup(promises); + for ((p = strtok(buf, " ")); p; (p = strtok(0, " "))) { + f = 0; + for (pp = strpromises; pp->name; pp++) { + if (strcmp(p, pp->name) == 0) + f = pp->flags; + } + if (!f) { + free(buf); + errno = EINVAL; + return -1; + } + flags |= f; + } + free(buf); + + if ((currflags & PLEDGED) != PLEDGED) { + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) + return -1; + filterprog = pledge_whitelist(flags); + } else { + filterprog = pledge_blacklist(flags, currflags); + } + + if (filterprog) { + if ((rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, filterprog)) == -1) + goto ret; + free(filterprog); + } + + if ((filterprog = pledge_filter(flags, currflags))) + if ((rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, filterprog)) == -1) + goto ret; + + currflags = flags | PLEDGED; + +ret: + free(filterprog); + return rv; +} + +#ifdef TEST +int +main(int argc, char *argv[]) +{ + if (pledge("stdio chown fattr cpath proc id", 0) == -1) { + fprintf(stderr, "error: pledge\n"); + exit(1); + } + + if (argc == 2) { + if (pledge("stdio", 0) == -1) { + fprintf(stderr, "error: pledge\n"); + exit(1); + } + printf("block chown\n"); + chown("./test", 1000, 1000); + } else if (argc == 3) { + printf("allow unlink\n"); + unlink("./test"); + } else if (argc == 4) { + if (pledge("stdio", 0) == -1) { + fprintf(stderr, "error: pledge\n"); + exit(1); + } + printf("block unlink\n"); + unlink("./test"); + } else if (argc == 5) { +#ifdef getentropy + printf("block getrandom\n"); + char buf[128]; + getentropy(buf, sizeof buf); +#endif + } else if (argc == 6) { + if (pledge("stdio foo", 0) == -1) { + fprintf(stderr, "error: pledge\n"); + exit(1); + } + } else if (argc == 7) { + fprintf(stderr, "test chown(.., 1001, 1001)\n"); + chown("./test", 1001, 1001); + } else { + printf("allow\n"); + } + return 0; +} +#endif diff --git a/newns.1 b/newns.1 diff --git a/newns.2 b/newns.2 diff --git a/newns.c b/newns.c @@ -0,0 +1,82 @@ +#define _GNU_SOURCE /* for CLONE_* */ +#include <err.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +static char *argv0; + +static void +usage() +{ + fprintf(stderr, "usage: %s [-c dir] [-b new:old] [-n namespace] [command] [args]\n", argv0); + exit(1); +} + +static int +write_map(const char *file, unsigned int start, unsigned int end) +{ + char buf[32]; + int fd, rv; + rv = 0; + if ((fd = open(file, O_RDWR)) == -1) + return 1; + if (write(fd, buf, snprintf(buf, sizeof buf, "%u %u 1", start, end)) == -1) + rv = 1; + close(fd); + return rv; +} + +int +main(int argc, char **argv) +{ + char opt; + char *nsfile, *dir; + char *defargv[] = { "/bin/sh", 0 }; + uid_t uid; + gid_t gid; + int fd; + + argv0 = *argv; + nsfile = 0; + + while ((opt = getopt(argc, argv, "+cbno")) != -1) + switch (opt) { + case 'c': dir = optarg; break; + case 'b': /* add_bind(optarg); */; break; + case 'n': nsfile = optarg; break; + case 'o': /* add_overlay(optarg); */; break; + default: usage(); + } + + argc -= optind; + argv += optind; + if (!argc) + argv = defargv; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER|CLONE_NEWNS) == -1) + err(1, "unshare"); + + if ((fd = open("/proc/self/setgroups", O_RDWR)) != -1) { + if (write(fd, "deny", 4) == -1) + err(1, "write /proc/self/setgroups"); + close(fd); + } + + if (write_map("/proc/self/uid_map", uid, uid)) + err(1, "write /proc/self/uid_map"); + if (write_map("/proc/self/gid_map", gid, gid)) + err(1, "write /proc/self/gid_map"); + + if ((dir && chdir(dir) == -1) || chdir("/") == -1) + err(1, "chdir"); + + + execvp(*argv, argv); + err(1, "exec: %s", *argv); +} diff --git a/newns.h b/newns.h diff --git a/pledge.1 b/pledge.1 @@ -0,0 +1,80 @@ +.Dd July 22, 2017 +.Dt PLEDGE 1 +.Os +.Sh NAME +.Nm pledge +.Nd execute commands with restricted syscalls +.Sh SYNOPSIS +.Nm +.Op Fl p Ar promises +.Ar command +.Op Ar args\ ... +.Sh DESCRIPTION +The +.Nm +utility executes the given +.Ar command +with restricted access to syscalls using +.Xr seccomp 2 . +The +.Ar promises +argument specifies the groups of syscalls the command is allowed to used. +If the command uses a syscall from a group that is not promised it is killed +by a +.Dv SIGSYS +signal. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl p Ar promises +A space separated list of promises. +See +.Sx PROMISES +for a complete list of available promises. +Default: +.Sq Li "exec stdio" . +.El +.Sh PROMISES +See +.Xr pledge 2 +for a more complete description of each promise. +.Bl -tag -width Ds +.It Ar stdio +Allows most basic syscalls. +.It Ar rpath +Read-only operations on the filesystem. +.It Ar wpath +Write operations on the filesystem. +.It Ar cpath +Allows the creation of new files and directories. +.It Ar inet +.Dv AF_INET +and +.Dv AF_INET6 +sockets. +.It Ar fattr +Change file attributes. +.It Ar chmod +Change file modes. +.It Ar flock +File locking. +.It Ar unix +.Dv AF_UNIX +sockets. +.It Ar proc +Process relationship operations. +.It Ar exec +Start new processes. +.It Ar id +Syscalls that can change the rights of a process. +.El +.Sh EXIT STATUS +.Ex -std +.Sh SEE ALSO +.Xr pledge 2 , +.Xr seccomp 2 , +.Xr syscalls 2 +.Sh AUTHORS +.An Duncan Overbruck Aq Mt mail@duncano.de +.Sh LICENSE +TBA diff --git a/pledge.2 b/pledge.2 diff --git a/pledge.c b/pledge.c @@ -0,0 +1,61 @@ +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/types.h> +#include <sys/wait.h> + +#include "pledge.h" + +static char *argv0; + +static void +usage() +{ + fprintf(stderr, "usage: %s [-p promises] command [args]\n", argv0); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + char promises[128]; + char *p, *n; + ssize_t len; + int c; + + len = sizeof promises - 1; + argv0 = *argv; + + memset(promises, 0, sizeof promises); + strcpy(promises, "exec stdio"); + p = promises+strlen(promises); + + while((c = getopt(argc, argv, "+p:")) != -1) + switch (c) { + case 'p': + n = p+strlen(optarg)+1; + if (n-promises >= len) + errx(1, "promises: too long"); + *p++ = ' '; + memcpy(p, optarg, n-p); + p = n; + break; + default: usage(); + } + + argc -= optind; + argv += optind; + + if (!argc) + usage(); + + if (pledge(promises, 0) != 0) + err(1, "%s", promises); + + execvp(*argv, argv); + err(1, "exec: %s", *argv); +} diff --git a/pledge.h b/pledge.h @@ -0,0 +1 @@ +int pledge(const char *, const char *[]);