#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
#include <linux/uaccess.h>
-#include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
+#include <linux/magic.h>
#include "pnode.h"
#include "internal.h"
}
mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+ /* Don't allow unprivileged users to change mount flags */
+ if (flag & CL_UNPRIVILEGED) {
+ mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
+
+ if (mnt->mnt.mnt_flags & MNT_READONLY)
+ mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+
+ if (mnt->mnt.mnt_flags & MNT_NODEV)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
+
+ if (mnt->mnt.mnt_flags & MNT_NOSUID)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
+
+ if (mnt->mnt.mnt_flags & MNT_NOEXEC)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
+ }
+
atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
static LIST_HEAD(unmounted); /* protected by namespace_sem */
-void release_mounts(struct list_head *head)
+static void namespace_unlock(void)
{
struct mount *mnt;
- while (!list_empty(head)) {
- mnt = list_first_entry(head, struct mount, mnt_hash);
+ LIST_HEAD(head);
+
+ if (likely(list_empty(&unmounted))) {
+ up_write(&namespace_sem);
+ return;
+ }
+
+ list_splice_init(&unmounted, &head);
+ up_write(&namespace_sem);
+
+ while (!list_empty(&head)) {
+ mnt = list_first_entry(&head, struct mount, mnt_hash);
list_del_init(&mnt->mnt_hash);
if (mnt_has_parent(mnt)) {
struct dentry *dentry;
}
}
-static void namespace_unlock(void)
+static inline void namespace_lock(void)
{
- LIST_HEAD(head);
- list_splice_init(&unmounted, &head);
- up_write(&namespace_sem);
- release_mounts(&head);
+ down_write(&namespace_sem);
}
/*
* vfsmount lock must be held for write
* namespace_sem must be held for write
*/
-void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
+void umount_tree(struct mount *mnt, int propagate)
{
LIST_HEAD(tmp_list);
struct mount *p;
}
change_mnt_propagation(p, MS_PRIVATE);
}
- list_splice(&tmp_list, kill);
+ list_splice(&tmp_list, &unmounted);
}
static void shrink_submounts(struct mount *mnt);
{
struct super_block *sb = mnt->mnt.mnt_sb;
int retval;
- LIST_HEAD(umount_list);
retval = security_sb_umount(&mnt->mnt, flags);
if (retval)
* Special case for "unmounting" root ...
* we just try to remount it readonly.
*/
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
down_write(&sb->s_umount);
if (!(sb->s_flags & MS_RDONLY))
retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
return retval;
}
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
event++;
retval = -EBUSY;
if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
if (!list_empty(&mnt->mnt_list))
- umount_tree(mnt, 1, &unmounted);
+ umount_tree(mnt, 1);
retval = 0;
}
br_write_unlock(&vfsmount_lock);
goto dput_and_out;
if (!check_mnt(mnt))
goto dput_and_out;
+ retval = -EPERM;
+ if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+ goto dput_and_out;
retval = do_umount(mnt, flags);
dput_and_out:
* mount namespace loop?
*/
struct inode *inode = path->dentry->d_inode;
- struct proc_inode *ei;
+ struct proc_ns *ei;
struct mnt_namespace *mnt_ns;
if (!proc_ns_inode(inode))
return false;
- ei = PROC_I(inode);
+ ei = get_proc_ns(inode);
if (ei->ns_ops != &mntns_operations)
return false;
return res;
out:
if (res) {
- LIST_HEAD(umount_list);
br_write_lock(&vfsmount_lock);
- umount_tree(res, 0, &umount_list);
+ umount_tree(res, 0);
br_write_unlock(&vfsmount_lock);
- release_mounts(&umount_list);
}
return q;
}
struct vfsmount *collect_mounts(struct path *path)
{
struct mount *tree;
- down_write(&namespace_sem);
+ namespace_lock();
tree = copy_tree(real_mount(path->mnt), path->dentry,
CL_COPY_ALL | CL_PRIVATE);
- up_write(&namespace_sem);
+ namespace_unlock();
if (IS_ERR(tree))
- return NULL;
+ return ERR_CAST(tree);
return &tree->mnt;
}
void drop_collected_mounts(struct vfsmount *mnt)
{
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
- umount_tree(real_mount(mnt), 0, &unmounted);
+ umount_tree(real_mount(mnt), 0);
br_write_unlock(&vfsmount_lock);
namespace_unlock();
}
mutex_unlock(&dentry->d_inode->i_mutex);
return ERR_PTR(-ENOENT);
}
- down_write(&namespace_sem);
+ namespace_lock();
mnt = lookup_mnt(path);
if (likely(!mnt)) {
struct mountpoint *mp = new_mountpoint(dentry);
if (IS_ERR(mp)) {
- up_write(&namespace_sem);
+ namespace_unlock();
mutex_unlock(&dentry->d_inode->i_mutex);
return mp;
}
return mp;
}
- up_write(&namespace_sem);
+ namespace_unlock();
mutex_unlock(&path->dentry->d_inode->i_mutex);
path_put(path);
path->mnt = mnt;
{
struct dentry *dentry = where->m_dentry;
put_mountpoint(where);
- up_write(&namespace_sem);
+ namespace_unlock();
mutex_unlock(&dentry->d_inode->i_mutex);
}
if (!type)
return -EINVAL;
- down_write(&namespace_sem);
+ namespace_lock();
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
br_write_unlock(&vfsmount_lock);
out_unlock:
- up_write(&namespace_sem);
+ namespace_unlock();
return err;
}
static int do_loopback(struct path *path, const char *old_name,
int recurse)
{
- LIST_HEAD(umount_list);
struct path old_path;
struct mount *mnt = NULL, *old, *parent;
struct mountpoint *mp;
err = graft_tree(mnt, parent, mp);
if (err) {
br_write_lock(&vfsmount_lock);
- umount_tree(mnt, 0, &umount_list);
+ umount_tree(mnt, 0);
br_write_unlock(&vfsmount_lock);
}
out2:
unlock_mount(mp);
- release_mounts(&umount_list);
out:
path_put(&old_path);
return err;
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
+ /* Don't allow changing of locked mnt flags.
+ *
+ * No locks need to be held here while testing the various
+ * MNT_LOCK flags because those flags can never be cleared
+ * once they are set.
+ */
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
+ !(mnt_flags & MNT_READONLY)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
+ !(mnt_flags & MNT_NODEV)) {
+ /* Was the nodev implicitly added in mount? */
+ if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
+ !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+ mnt_flags |= MNT_NODEV;
+ } else {
+ return -EPERM;
+ }
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
+ !(mnt_flags & MNT_NOSUID)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
+ !(mnt_flags & MNT_NOEXEC)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
+ ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+ return -EPERM;
+ }
+
err = security_sb_remount(sb, data);
if (err)
return err;
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
br_write_lock(&vfsmount_lock);
- mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
+ mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
mnt->mnt.mnt_flags = mnt_flags;
br_write_unlock(&vfsmount_lock);
}
*/
if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
flags |= MS_NODEV;
- mnt_flags |= MNT_NODEV;
+ mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
}
}
fail:
/* remove m from any expiration list it may be on */
if (!list_empty(&mnt->mnt_expire)) {
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
list_del_init(&mnt->mnt_expire);
br_write_unlock(&vfsmount_lock);
- up_write(&namespace_sem);
+ namespace_unlock();
}
mntput(m);
mntput(m);
*/
void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
{
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
br_write_unlock(&vfsmount_lock);
- up_write(&namespace_sem);
+ namespace_unlock();
}
EXPORT_SYMBOL(mnt_set_expiry);
if (list_empty(mounts))
return;
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
/* extract from the expiration list every vfsmount that matches the
while (!list_empty(&graveyard)) {
mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
touch_mnt_namespace(mnt->mnt_ns);
- umount_tree(mnt, 1, &unmounted);
+ umount_tree(mnt, 1);
}
br_write_unlock(&vfsmount_lock);
namespace_unlock();
m = list_first_entry(&graveyard, struct mount,
mnt_expire);
touch_mnt_namespace(m->mnt_ns);
- umount_tree(m, 1, &unmounted);
+ umount_tree(m, 1);
}
}
}
retval = security_sb_mount(dev_name, &path,
type_page, flags, data_page);
+ if (!retval && !may_mount())
+ retval = -EPERM;
if (retval)
goto dput_out;
- if (!may_mount())
- return -EPERM;
-
/* Default to relatime unless overriden */
if (!(flags & MS_NOATIME))
mnt_flags |= MNT_RELATIME;
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
+ /* The default atime for remount is preservation */
+ if ((flags & MS_REMOUNT) &&
+ ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
+ MS_STRICTATIME)) == 0)) {
+ mnt_flags &= ~MNT_ATIME_MASK;
+ mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
+ }
+
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
MS_STRICTATIME);
if (IS_ERR(new_ns))
return new_ns;
- down_write(&namespace_sem);
+ namespace_lock();
/* First pass: copy the tree topology */
copy_flags = CL_COPY_ALL | CL_EXPIRE;
if (user_ns != mnt_ns->user_ns)
- copy_flags |= CL_SHARED_TO_SLAVE;
+ copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
- up_write(&namespace_sem);
+ namespace_unlock();
free_mnt_ns(new_ns);
return ERR_CAST(new);
}
p = next_mnt(p, old);
q = next_mnt(q, new);
}
- up_write(&namespace_sem);
+ namespace_unlock();
if (rootmnt)
mntput(rootmnt);
struct mount *mnt = real_mount(m);
mnt->mnt_ns = new_ns;
new_ns->root = mnt;
- list_add(&new_ns->list, &mnt->mnt_list);
+ list_add(&mnt->mnt_list, &new_ns->list);
} else {
mntput(m);
}
/* make sure we can reach put_old from new_root */
if (!is_path_reachable(old_mnt, old.dentry, &new))
goto out4;
+ /* make certain new is below the root */
+ if (!is_path_reachable(new_mnt, new.dentry, &root))
+ goto out4;
root_mp->m_count++; /* pin it so it won't go away */
br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path);
{
if (!atomic_dec_and_test(&ns->count))
return;
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
- umount_tree(ns->root, 0, &unmounted);
+ umount_tree(ns->root, 0);
br_write_unlock(&vfsmount_lock);
namespace_unlock();
free_mnt_ns(ns);
return check_mnt(real_mount(mnt));
}
+bool current_chrooted(void)
+{
+ /* Does the current process have a non-standard root */
+ struct path ns_root;
+ struct path fs_root;
+ bool chrooted;
+
+ /* Find the namespace root */
+ ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
+ ns_root.dentry = ns_root.mnt->mnt_root;
+ path_get(&ns_root);
+ while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
+ ;
+
+ get_fs_root(current->fs, &fs_root);
+
+ chrooted = !path_equal(&fs_root, &ns_root);
+
+ path_put(&fs_root);
+ path_put(&ns_root);
+
+ return chrooted;
+}
+
+void update_mnt_policy(struct user_namespace *userns)
+{
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ struct mount *mnt;
+
+ down_read(&namespace_sem);
+ list_for_each_entry(mnt, &ns->list, mnt_list) {
+ switch (mnt->mnt.mnt_sb->s_magic) {
+ case SYSFS_MAGIC:
+ userns->may_mount_sysfs = true;
+ break;
+ case PROC_SUPER_MAGIC:
+ userns->may_mount_proc = true;
+ break;
+ }
+ if (userns->may_mount_sysfs && userns->may_mount_proc)
+ break;
+ }
+ up_read(&namespace_sem);
+}
+
static void *mntns_get(struct task_struct *task)
{
struct mnt_namespace *ns = NULL;