lib/proportions.c

   1 /*
   2  * Floating proportions
   3  *
   4  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
   5  *
   6  * Description:
   7  *
   8  * The floating proportion is a time derivative with an exponentially decaying
   9  * history:
  10  *
  11  *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
  12  *
  13  * Where j is an element from {prop_local}, x_{j} is j's number of events,
  14  * and i the time period over which the differential is taken. So d/dt_{-i} is
  15  * the differential over the i-th last period.
  16  *
  17  * The decaying history gives smooth transitions. The time differential carries
  18  * the notion of speed.
  19  *
  20  * The denominator is 2^(1+i) because we want the series to be normalised, ie.
  21  *
  22  *   \Sum_{i=0} 1/2^(1+i) = 1
  23  *
  24  * Further more, if we measure time (t) in the same events as x; so that:
  25  *
  26  *   t = \Sum_{j} x_{j}
  27  *
  28  * we get that:
  29  *
  30  *   \Sum_{j} p_{j} = 1
  31  *
  32  * Writing this in an iterative fashion we get (dropping the 'd's):
  33  *
  34  *   if (++x_{j}, ++t > period)
  35  *     t /= 2;
  36  *     for_each (j)
  37  *       x_{j} /= 2;
  38  *
  39  * so that:
  40  *
  41  *   p_{j} = x_{j} / t;
  42  *
  43  * We optimize away the '/= 2' for the global time delta by noting that:
  44  *
  45  *   if (++t > period) t /= 2:
  46  *
  47  * Can be approximated by:
  48  *
  49  *   period/2 + (++t % period/2)
  50  *
  51  * [ Furthermore, when we choose period to be 2^n it can be written in terms of
  52  *   binary operations and wraparound artefacts disappear. ]
  53  *
  54  * Also note that this yields a natural counter of the elapsed periods:
  55  *
  56  *   c = t / (period/2)
  57  *
  58  * [ Its monotonic increasing property can be applied to mitigate the wrap-
  59  *   around issue. ]
  60  *
  61  * This allows us to do away with the loop over all prop_locals on each period
  62  * expiration. By remembering the period count under which it was last accessed
  63  * as c_{j}, we can obtain the number of 'missed' cycles from:
  64  *
  65  *   c - c_{j}
  66  *
  67  * We can then lazily catch up to the global period count every time we are
  68  * going to use x_{j}, by doing:
  69  *
  70  *   x_{j} /= 2^(c - c_{j}), c_{j} = c
  71  */
  72
  73 #include <linux/proportions.h>
  74 #include <linux/rcupdate.h>
  75
  76 int prop_descriptor_init(struct prop_descriptor *pd, int shift)
  77 {
  78         int err;
  79
  80         if (shift > PROP_MAX_SHIFT)
  81                 shift = PROP_MAX_SHIFT;
  82
  83         pd->index = 0;
  84         pd->pg[0].shift = shift;
  85         mutex_init(&pd->mutex);
  86         err = percpu_counter_init_irq(&pd->pg[0].events, 0);
  87         if (err)
  88                 goto out;
  89
  90         err = percpu_counter_init_irq(&pd->pg[1].events, 0);
  91         if (err)
  92                 percpu_counter_destroy(&pd->pg[0].events);
  93
  94 out:
  95         return err;
  96 }
  97
  98 /*
  99  * We have two copies, and flip between them to make it seem like an atomic
 100  * update. The update is not really atomic wrt the events counter, but
 101  * it is internally consistent with the bit layout depending on shift.
 102  *
 103  * We copy the events count, move the bits around and flip the index.
 104  */
 105 void prop_change_shift(struct prop_descriptor *pd, int shift)
 106 {
 107         int index;
 108         int offset;
 109         u64 events;
 110         unsigned long flags;
 111
 112         if (shift > PROP_MAX_SHIFT)
 113                 shift = PROP_MAX_SHIFT;
 114
 115         mutex_lock(&pd->mutex);
 116
 117         index = pd->index ^ 1;
 118         offset = pd->pg[pd->index].shift - shift;
 119         if (!offset)
 120                 goto out;
 121
 122         pd->pg[index].shift = shift;
 123
 124         local_irq_save(flags);
 125         events = percpu_counter_sum(&pd->pg[pd->index].events);
 126         if (offset < 0)
 127                 events <<= -offset;
 128         else
 129                 events >>= offset;
 130         percpu_counter_set(&pd->pg[index].events, events);
 131
 132         /*
 133          * ensure the new pg is fully written before the switch
 134          */
 135         smp_wmb();
 136         pd->index = index;
 137         local_irq_restore(flags);
 138
 139         synchronize_rcu();
 140
 141 out:
 142         mutex_unlock(&pd->mutex);
 143 }
 144
 145 /*
 146  * wrap the access to the data in an rcu_read_lock() section;
 147  * this is used to track the active references.
 148  */
 149 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
 150 {
 151         int index;
 152
 153         rcu_read_lock();
 154         index = pd->index;
 155         /*
 156          * match the wmb from vcd_flip()
 157          */
 158         smp_rmb();
 159         return &pd->pg[index];
 160 }
 161
 162 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
 163 {
 164         rcu_read_unlock();
 165 }
 166
 167 static void
 168 prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 169 {
 170         int offset = *pl_shift - new_shift;
 171
 172         if (!offset)
 173                 return;
 174
 175         if (offset < 0)
 176                 *pl_period <<= -offset;
 177         else
 178                 *pl_period >>= offset;
 179
 180         *pl_shift = new_shift;
 181 }
 182
 183 /*
 184  * PERCPU
 185  */
 186
 187 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 188
 189 int prop_local_init_percpu(struct prop_local_percpu *pl)
 190 {
 191         spin_lock_init(&pl->lock);
 192         pl->shift = 0;
 193         pl->period = 0;
 194         return percpu_counter_init_irq(&pl->events, 0);
 195 }
 196
 197 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
 198 {
 199         percpu_counter_destroy(&pl->events);
 200 }
 201
 202 /*
 203  * Catch up with missed period expirations.
 204  *
 205  *   until (c_{j} == c)
 206  *     x_{j} -= x_{j}/2;
 207  *     c_{j}++;
 208  */
 209 static
 210 void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 211 {
 212         unsigned long period = 1UL << (pg->shift - 1);
 213         unsigned long period_mask = ~(period - 1);
 214         unsigned long global_period;
 215         unsigned long flags;
 216
 217         global_period = percpu_counter_read(&pg->events);
 218         global_period &= period_mask;
 219
 220         /*
 221          * Fast path - check if the local and global period count still match
 222          * outside of the lock.
 223          */
 224         if (pl->period == global_period)
 225                 return;
 226
 227         spin_lock_irqsave(&pl->lock, flags);
 228         prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 229
 230         /*
 231          * For each missed period, we half the local counter.
 232          * basically:
 233          *   pl->events >> (global_period - pl->period);
 234          */
 235         period = (global_period - pl->period) >> (pg->shift - 1);
 236         if (period < BITS_PER_LONG) {
 237                 s64 val = percpu_counter_read(&pl->events);
 238
 239                 if (val < (nr_cpu_ids * PROP_BATCH))
 240                         val = percpu_counter_sum(&pl->events);
 241
 242                 __percpu_counter_add(&pl->events, -val + (val >> period),
 243                                         PROP_BATCH);
 244         } else
 245                 percpu_counter_set(&pl->events, 0);
 246
 247         pl->period = global_period;
 248         spin_unlock_irqrestore(&pl->lock, flags);
 249 }
 250
 251 /*
 252  *   ++x_{j}, ++t
 253  */
 254 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
 255 {
 256         struct prop_global *pg = prop_get_global(pd);
 257
 258         prop_norm_percpu(pg, pl);
 259         __percpu_counter_add(&pl->events, 1, PROP_BATCH);
 260         percpu_counter_add(&pg->events, 1);
 261         prop_put_global(pd, pg);
 262 }
 263
 264 /*
 265  * identical to __prop_inc_percpu, except that it limits this pl's fraction to
 266  * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
 267  */
 268 void __prop_inc_percpu_max(struct prop_descriptor *pd,
 269                            struct prop_local_percpu *pl, long frac)
 270 {
 271         struct prop_global *pg = prop_get_global(pd);
 272
 273         prop_norm_percpu(pg, pl);
 274
 275         if (unlikely(frac != PROP_FRAC_BASE)) {
 276                 unsigned long period_2 = 1UL << (pg->shift - 1);
 277                 unsigned long counter_mask = period_2 - 1;
 278                 unsigned long global_count;
 279                 long numerator, denominator;
 280
 281                 numerator = percpu_counter_read_positive(&pl->events);
 282                 global_count = percpu_counter_read(&pg->events);
 283                 denominator = period_2 + (global_count & counter_mask);
 284
 285                 if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
 286                         goto out_put;
 287         }
 288
 289         percpu_counter_add(&pl->events, 1);
 290         percpu_counter_add(&pg->events, 1);
 291
 292 out_put:
 293         prop_put_global(pd, pg);
 294 }
 295
 296 /*
 297  * Obtain a fraction of this proportion
 298  *
 299  *   p_{j} = x_{j} / (period/2 + t % period/2)
 300  */
 301 void prop_fraction_percpu(struct prop_descriptor *pd,
 302                 struct prop_local_percpu *pl,
 303                 long *numerator, long *denominator)
 304 {
 305         struct prop_global *pg = prop_get_global(pd);
 306         unsigned long period_2 = 1UL << (pg->shift - 1);
 307         unsigned long counter_mask = period_2 - 1;
 308         unsigned long global_count;
 309
 310         prop_norm_percpu(pg, pl);
 311         *numerator = percpu_counter_read_positive(&pl->events);
 312
 313         global_count = percpu_counter_read(&pg->events);
 314         *denominator = period_2 + (global_count & counter_mask);
 315
 316         prop_put_global(pd, pg);
 317 }
 318
 319 /*
 320  * SINGLE
 321  */
 322
 323 int prop_local_init_single(struct prop_local_single *pl)
 324 {
 325         spin_lock_init(&pl->lock);
 326         pl->shift = 0;
 327         pl->period = 0;
 328         pl->events = 0;
 329         return 0;
 330 }
 331
 332 void prop_local_destroy_single(struct prop_local_single *pl)
 333 {
 334 }
 335
 336 /*
 337  * Catch up with missed period expirations.
 338  */
 339 static
 340 void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
 341 {
 342         unsigned long period = 1UL << (pg->shift - 1);
 343         unsigned long period_mask = ~(period - 1);
 344         unsigned long global_period;
 345         unsigned long flags;
 346
 347         global_period = percpu_counter_read(&pg->events);
 348         global_period &= period_mask;
 349
 350         /*
 351          * Fast path - check if the local and global period count still match
 352          * outside of the lock.
 353          */
 354         if (pl->period == global_period)
 355                 return;
 356
 357         spin_lock_irqsave(&pl->lock, flags);
 358         prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 359         /*
 360          * For each missed period, we half the local counter.
 361          */
 362         period = (global_period - pl->period) >> (pg->shift - 1);
 363         if (likely(period < BITS_PER_LONG))
 364                 pl->events >>= period;
 365         else
 366                 pl->events = 0;
 367         pl->period = global_period;
 368         spin_unlock_irqrestore(&pl->lock, flags);
 369 }
 370
 371 /*
 372  *   ++x_{j}, ++t
 373  */
 374 void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
 375 {
 376         struct prop_global *pg = prop_get_global(pd);
 377
 378         prop_norm_single(pg, pl);
 379         pl->events++;
 380         percpu_counter_add(&pg->events, 1);
 381         prop_put_global(pd, pg);
 382 }
 383
 384 /*
 385  * Obtain a fraction of this proportion
 386  *
 387  *   p_{j} = x_{j} / (period/2 + t % period/2)
 388  */
 389 void prop_fraction_single(struct prop_descriptor *pd,
 390                 struct prop_local_single *pl,
 391                 long *numerator, long *denominator)
 392 {
 393         struct prop_global *pg = prop_get_global(pd);
 394         unsigned long period_2 = 1UL << (pg->shift - 1);
 395         unsigned long counter_mask = period_2 - 1;
 396         unsigned long global_count;
 397
 398         prop_norm_single(pg, pl);
 399         *numerator = pl->events;
 400
 401         global_count = percpu_counter_read(&pg->events);
 402         *denominator = period_2 + (global_count & counter_mask);
 403
 404         prop_put_global(pd, pg);
 405 }