ceph: drop support for preferred_osd pgs
Sage Weil [Mon, 7 May 2012 22:33:36 +0000 (15:33 -0700)]
This was an ill-conceived feature that has been removed from Ceph.  Do
this gracefully:

 - reject attempts to specify a preferred_osd via the ioctl
 - stop exposing this information via virtual xattrs
 - always fill in -1 for requests, in case we talk to an older server
 - don't calculate preferred_osd placements/pgids

Reviewed-by: Alex Elder <elder@inktank.com>
Signed-off-by: Sage Weil <sage@inktank.com>

drivers/block/rbd.c
fs/ceph/file.c
fs/ceph/ioctl.c
fs/ceph/xattr.c
include/linux/ceph/ceph_fs.h
include/linux/ceph/osdmap.h
net/ceph/osdmap.c

index c1f7701..a67fa63 100644 (file)
@@ -935,7 +935,6 @@ static int rbd_do_request(struct request *rq,
        layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
        layout->fl_stripe_count = cpu_to_le32(1);
        layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-       layout->fl_pg_preferred = cpu_to_le32(-1);
        layout->fl_pg_pool = cpu_to_le32(dev->poolid);
        ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
                                req, ops);
index ed72428..988d4f3 100644 (file)
@@ -54,7 +54,6 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode)
        req->r_fmode = ceph_flags_to_mode(flags);
        req->r_args.open.flags = cpu_to_le32(flags);
        req->r_args.open.mode = cpu_to_le32(create_mode);
-       req->r_args.open.preferred = cpu_to_le32(-1);
 out:
        return req;
 }
index 790914a..4feab52 100644 (file)
@@ -26,8 +26,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
                l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
                l.object_size = ceph_file_layout_object_size(ci->i_layout);
                l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
-               l.preferred_osd =
-                       (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
+               l.preferred_osd = (s32)-1;
                if (copy_to_user(arg, &l, sizeof(l)))
                        return -EFAULT;
        }
@@ -49,6 +48,10 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
        if (copy_from_user(&l, arg, sizeof(l)))
                return -EFAULT;
 
+       /* preferred_osd is no longer supported */
+       if (l.preferred_osd != -1)
+               return -EINVAL;
+
        /* validate changed params against current layout */
        err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
        if (!err) {
@@ -56,8 +59,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
                nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
                nl.object_size = ceph_file_layout_object_size(ci->i_layout);
                nl.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
-               nl.preferred_osd =
-                               (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
        } else
                return err;
 
@@ -69,8 +70,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
                nl.object_size = l.object_size;
        if (l.data_pool)
                nl.data_pool = l.data_pool;
-       if (l.preferred_osd)
-               nl.preferred_osd = l.preferred_osd;
 
        if ((nl.object_size & ~PAGE_MASK) ||
            (nl.stripe_unit & ~PAGE_MASK) ||
@@ -106,8 +105,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
        req->r_args.setlayout.layout.fl_object_size =
                cpu_to_le32(l.object_size);
        req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
-       req->r_args.setlayout.layout.fl_pg_preferred =
-               cpu_to_le32(l.preferred_osd);
 
        parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
@@ -171,8 +168,6 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
                        cpu_to_le32(l.object_size);
        req->r_args.setlayout.layout.fl_pg_pool =
                        cpu_to_le32(l.data_pool);
-       req->r_args.setlayout.layout.fl_pg_preferred =
-                       cpu_to_le32(l.preferred_osd);
 
        err = ceph_mdsc_do_request(mdsc, inode, req);
        ceph_mdsc_put_request(req);
index 35b8633..785cb30 100644 (file)
@@ -118,15 +118,6 @@ static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
                (unsigned long long)ceph_file_layout_su(ci->i_layout),
                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
                (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-
-       if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) {
-               val += ret;
-               size -= ret;
-               ret += snprintf(val, size, "preferred_osd=%lld\n",
-                           (unsigned long long)ceph_file_layout_pg_preferred(
-                                   ci->i_layout));
-       }
-
        return ret;
 }
 
index b8c6069..e81ab30 100644 (file)
@@ -65,7 +65,7 @@ struct ceph_file_layout {
        __le32 fl_object_stripe_unit;  /* UNUSED.  for per-object parity, if any */
 
        /* object -> pg layout */
-       __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
+       __le32 fl_unused;       /* unused; used to be preferred primary (-1) */
        __le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
 } __attribute__ ((packed));
 
@@ -384,7 +384,7 @@ union ceph_mds_request_args {
                __le32 stripe_count;         /* ... */
                __le32 object_size;
                __le32 file_replication;
-               __le32 preferred;
+               __le32 unused;               /* used to be preferred osd */
        } __attribute__ ((packed)) open;
        struct {
                __le32 flags;
index ba4c205..311ef8d 100644 (file)
@@ -65,8 +65,6 @@ struct ceph_osdmap {
 #define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
 #define ceph_file_layout_object_su(l) \
        ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
-#define ceph_file_layout_pg_preferred(l) \
-       ((__s32)le32_to_cpu((l).fl_pg_preferred))
 #define ceph_file_layout_pg_pool(l) \
        ((__s32)le32_to_cpu((l).fl_pg_pool))
 
index 29ad46e..7d39f3c 100644 (file)
@@ -1000,7 +1000,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
 {
        unsigned num, num_mask;
        struct ceph_pg pgid;
-       s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
        int poolid = le32_to_cpu(fl->fl_pg_pool);
        struct ceph_pg_pool_info *pool;
        unsigned ps;
@@ -1011,23 +1010,13 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
        if (!pool)
                return -EIO;
        ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
-       if (preferred >= 0) {
-               ps += preferred;
-               num = le32_to_cpu(pool->v.lpg_num);
-               num_mask = pool->lpg_num_mask;
-       } else {
-               num = le32_to_cpu(pool->v.pg_num);
-               num_mask = pool->pg_num_mask;
-       }
+       num = le32_to_cpu(pool->v.pg_num);
+       num_mask = pool->pg_num_mask;
 
        pgid.ps = cpu_to_le16(ps);
-       pgid.preferred = cpu_to_le16(preferred);
+       pgid.preferred = cpu_to_le16(-1);
        pgid.pool = fl->fl_pg_pool;
-       if (preferred >= 0)
-               dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps,
-                    (int)preferred);
-       else
-               dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
+       dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
 
        ol->ol_pgid = pgid;
        ol->ol_stripe_unit = fl->fl_object_stripe_unit;
@@ -1046,23 +1035,17 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
        struct ceph_pg_pool_info *pool;
        int ruleno;
        unsigned poolid, ps, pps, t;
-       int preferred;
 
        poolid = le32_to_cpu(pgid.pool);
        ps = le16_to_cpu(pgid.ps);
-       preferred = (s16)le16_to_cpu(pgid.preferred);
 
        pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
        if (!pool)
                return NULL;
 
        /* pg_temp? */
-       if (preferred >= 0)
-               t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num),
-                                   pool->lpgp_num_mask);
-       else
-               t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
-                                   pool->pgp_num_mask);
+       t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
+                           pool->pgp_num_mask);
        pgid.ps = cpu_to_le16(t);
        pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
        if (pg) {
@@ -1080,23 +1063,13 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
                return NULL;
        }
 
-       /* don't forcefeed bad device ids to crush */
-       if (preferred >= osdmap->max_osd ||
-           preferred >= osdmap->crush->max_devices)
-               preferred = -1;
-
-       if (preferred >= 0)
-               pps = ceph_stable_mod(ps,
-                                     le32_to_cpu(pool->v.lpgp_num),
-                                     pool->lpgp_num_mask);
-       else
-               pps = ceph_stable_mod(ps,
-                                     le32_to_cpu(pool->v.pgp_num),
-                                     pool->pgp_num_mask);
+       pps = ceph_stable_mod(ps,
+                             le32_to_cpu(pool->v.pgp_num),
+                             pool->pgp_num_mask);
        pps += poolid;
        *num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
                             min_t(int, pool->v.size, *num),
-                            preferred, osdmap->osd_weight);
+                            -1, osdmap->osd_weight);
        return osds;
 }