diff --git a/include/linux/capability.h b/include/linux/capability.h index a1d93da67fe..ffe7bab8c3a 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -152,7 +152,9 @@ typedef struct kernel_cap_struct { * Transfer any capability in your permitted set to any pid, * remove any capability in your permitted set from any pid * With VFS support for capabilities (neither of above, but) - * Add any capability to the current process' inheritable set + * Add any capability from current's capability bounding set + * to the current process' inheritable set + * Allow taking bits out of capability bounding set */ #define CAP_SETPCAP 8 @@ -202,7 +204,6 @@ typedef struct kernel_cap_struct { #define CAP_IPC_OWNER 15 /* Insert and remove kernel modules - modify kernel without limit */ -/* Modify cap_bset */ #define CAP_SYS_MODULE 16 /* Allow ioperm/iopl access */ @@ -314,6 +315,10 @@ typedef struct kernel_cap_struct { #define CAP_SETFCAP 31 +#define CAP_LAST_CAP CAP_SETFCAP + +#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) + /* * Bit location of each capability (used by user-space library and kernel) */ @@ -465,6 +470,8 @@ extern const kernel_cap_t __cap_init_eff_set; int capable(int cap); int __capable(struct task_struct *t, int cap); +extern long cap_prctl_drop(unsigned long cap); + #endif /* __KERNEL__ */ #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f42663eaf65..1f74e1d7415 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -121,6 +121,18 @@ extern struct group_info init_groups; #else #define INIT_IDS #endif + +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +/* + * Because of the reduced scope of CAP_SETPCAP when filesystem + * capabilities are in effect, it is safe to allow CAP_SETPCAP to + * be available in the default configuration. + */ +# define CAP_INIT_BSET CAP_FULL_SET +#else +# define CAP_INIT_BSET CAP_INIT_EFF_SET +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -156,6 +168,7 @@ extern struct group_info init_groups; .cap_effective = CAP_INIT_EFF_SET, \ .cap_inheritable = CAP_INIT_INH_SET, \ .cap_permitted = CAP_FULL_SET, \ + .cap_bset = CAP_INIT_BSET, \ .keep_capabilities = 0, \ .user = INIT_USER, \ .comm = "swapper", \ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index e2eff9079fe..3800639775a 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -63,4 +63,8 @@ #define PR_GET_SECCOMP 21 #define PR_SET_SECCOMP 22 +/* Get/set the capability bounding set */ +#define PR_CAPBSET_READ 23 +#define PR_CAPBSET_DROP 24 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c30d174a02f..9c13be3a21e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1098,7 +1098,7 @@ struct task_struct { uid_t uid,euid,suid,fsuid; gid_t gid,egid,sgid,fsgid; struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted; + kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; unsigned keep_capabilities:1; struct user_struct *user; #ifdef CONFIG_KEYS diff --git a/include/linux/security.h b/include/linux/security.h index 9d289e726fd..fe52cdeab0a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -40,11 +40,6 @@ #define ROOTCONTEXT_MNT 0x04 #define DEFCONTEXT_MNT 0x08 -/* - * Bounding set - */ -extern kernel_cap_t cap_bset; - extern unsigned securebits; struct ctl_table; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index bf4ae4e138f..571f01d20a8 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -102,7 +102,6 @@ enum KERN_NODENAME=7, KERN_DOMAINNAME=8, - KERN_CAP_BSET=14, /* int: capability bounding set */ KERN_PANIC=15, /* int: panic timeout */ KERN_REALROOTDEV=16, /* real root device to mount after initrd */ @@ -965,8 +964,6 @@ extern int proc_dostring(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int proc_dointvec(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -extern int proc_dointvec_bset(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, diff --git a/kernel/fork.c b/kernel/fork.c index 1160f87ba70..2b55b74cd99 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1118,6 +1118,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_SECURITY p->security = NULL; #endif + p->cap_bset = current->cap_bset; p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); diff --git a/kernel/sys.c b/kernel/sys.c index d1fe71eb454..4162d12390b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1637,7 +1637,7 @@ asmlinkage long sys_umask(int mask) mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); return mask; } - + asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { @@ -1742,6 +1742,17 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = prctl_set_seccomp(arg2); break; + case PR_CAPBSET_READ: + if (!cap_valid(arg2)) + return -EINVAL; + return !!cap_raised(current->cap_bset, arg2); + case PR_CAPBSET_DROP: +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES + return cap_prctl_drop(arg2); +#else + return -EINVAL; +#endif + default: error = -EINVAL; break; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d0b47b85906..5e2ad5bf88e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -419,15 +419,6 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_SECURITY_CAPABILITIES - { - .procname = "cap-bound", - .data = &cap_bset, - .maxlen = sizeof(kernel_cap_t), - .mode = 0600, - .proc_handler = &proc_dointvec_bset, - }, -#endif /* def CONFIG_SECURITY_CAPABILITIES */ #ifdef CONFIG_BLK_DEV_INITRD { .ctl_name = KERN_REALROOTDEV, @@ -2096,26 +2087,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, return 0; } -#ifdef CONFIG_SECURITY_CAPABILITIES -/* - * init may raise the set. - */ - -int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int op; - - if (write && !capable(CAP_SYS_MODULE)) { - return -EPERM; - } - - op = is_global_init(current) ? OP_SET : OP_AND; - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, - do_proc_dointvec_bset_conv,&op); -} -#endif /* def CONFIG_SECURITY_CAPABILITIES */ - /* * Taint values can only be increased */ @@ -2529,12 +2500,6 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp, return -ENOSYS; } -int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index c3206fa5004..006365b69ea 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -37,10 +37,6 @@ static struct trans_ctl_table trans_kern_table[] = { { KERN_NODENAME, "hostname" }, { KERN_DOMAINNAME, "domainname" }, -#ifdef CONFIG_SECURITY_CAPABILITIES - { KERN_CAP_BSET, "cap-bound" }, -#endif /* def CONFIG_SECURITY_CAPABILITIES */ - { KERN_PANIC, "panic" }, { KERN_REALROOTDEV, "real-root-dev" }, @@ -1498,9 +1494,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) (table->strategy == sysctl_ms_jiffies) || (table->proc_handler == proc_dostring) || (table->proc_handler == proc_dointvec) || -#ifdef CONFIG_SECURITY_CAPABILITIES - (table->proc_handler == proc_dointvec_bset) || -#endif /* def CONFIG_SECURITY_CAPABILITIES */ (table->proc_handler == proc_dointvec_minmax) || (table->proc_handler == proc_dointvec_jiffies) || (table->proc_handler == proc_dointvec_userhz_jiffies) || diff --git a/security/commoncap.c b/security/commoncap.c index 01ab47845dc..5aba82679a0 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -25,20 +25,6 @@ #include #include -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES -/* - * Because of the reduced scope of CAP_SETPCAP when filesystem - * capabilities are in effect, it is safe to allow this capability to - * be available in the default configuration. - */ -# define CAP_INIT_BSET CAP_FULL_SET -#else /* ie. ndef CONFIG_SECURITY_FILE_CAPABILITIES */ -# define CAP_INIT_BSET CAP_INIT_EFF_SET -#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ - -kernel_cap_t cap_bset = CAP_INIT_BSET; /* systemwide capability bound */ -EXPORT_SYMBOL(cap_bset); - /* Global security state */ unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ @@ -140,6 +126,12 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, /* incapable of using this inheritable set */ return -EPERM; } + if (!cap_issubset(*inheritable, + cap_combine(target->cap_inheritable, + current->cap_bset))) { + /* no new pI capabilities outside bounding set */ + return -EPERM; + } /* verify restrictions on target's new Permitted set */ if (!cap_issubset (*permitted, @@ -337,10 +329,11 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; - new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); - working = cap_intersect (bprm->cap_inheritable, + new_permitted = cap_intersect(bprm->cap_permitted, + current->cap_bset); + working = cap_intersect(bprm->cap_inheritable, current->cap_inheritable); - new_permitted = cap_combine (new_permitted, working); + new_permitted = cap_combine(new_permitted, working); if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || !cap_issubset (new_permitted, current->cap_permitted)) { @@ -581,6 +574,23 @@ int cap_task_kill(struct task_struct *p, struct siginfo *info, return -EPERM; } + +/* + * called from kernel/sys.c for prctl(PR_CABSET_DROP) + * done without task_capability_lock() because it introduces + * no new races - i.e. only another task doing capget() on + * this task could get inconsistent info. There can be no + * racing writer bc a task can only change its own caps. + */ +long cap_prctl_drop(unsigned long cap) +{ + if (!capable(CAP_SETPCAP)) + return -EPERM; + if (!cap_valid(cap)) + return -EINVAL; + cap_lower(current->cap_bset, cap); + return 0; +} #else int cap_task_setscheduler (struct task_struct *p, int policy, struct sched_param *lp)