7663a2a9f130430c9ba80a8123d519ca9a1fbad9
[linux-2.6.git] / drivers / infiniband / hw / ehca / ehca_mrmw.c
1 /*
2  *  IBM eServer eHCA Infiniband device driver for Linux on POWER
3  *
4  *  MR/MW functions
5  *
6  *  Authors: Dietmar Decker <ddecker@de.ibm.com>
7  *           Christoph Raisch <raisch@de.ibm.com>
8  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9  *
10  *  Copyright (c) 2005 IBM Corporation
11  *
12  *  All rights reserved.
13  *
14  *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
15  *  BSD.
16  *
17  * OpenIB BSD License
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are met:
21  *
22  * Redistributions of source code must retain the above copyright notice, this
23  * list of conditions and the following disclaimer.
24  *
25  * Redistributions in binary form must reproduce the above copyright notice,
26  * this list of conditions and the following disclaimer in the documentation
27  * and/or other materials
28  * provided with the distribution.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40  * POSSIBILITY OF SUCH DAMAGE.
41  */
42
43 #include <rdma/ib_umem.h>
44
45 #include "ehca_iverbs.h"
46 #include "ehca_mrmw.h"
47 #include "hcp_if.h"
48 #include "hipz_hw.h"
49
50 #define NUM_CHUNKS(length, chunk_size) \
51         (((length) + (chunk_size - 1)) / (chunk_size))
52
53 /* max number of rpages (per hcall register_rpages) */
54 #define MAX_RPAGES 512
55
56 /* DMEM toleration management */
57 #define EHCA_SECTSHIFT        SECTION_SIZE_BITS
58 #define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
59 #define EHCA_HUGEPAGESHIFT     34
60 #define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
61 #define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
62 #define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
63 #define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
64 #define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
65 #define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
66 #define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
67 #define EHCA_DIR_MAP_SIZE (0x10000)
68 #define EHCA_ENT_MAP_SIZE (0x10000)
69 #define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
70
71 static unsigned long ehca_mr_len;
72
73 /*
74  * Memory map data structures
75  */
76 struct ehca_dir_bmap {
77         u64 ent[EHCA_MAP_ENTRIES];
78 };
79 struct ehca_top_bmap {
80         struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
81 };
82 struct ehca_bmap {
83         struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
84 };
85
86 static struct ehca_bmap *ehca_bmap;
87
88 static struct kmem_cache *mr_cache;
89 static struct kmem_cache *mw_cache;
90
91 enum ehca_mr_pgsize {
92         EHCA_MR_PGSIZE4K  = 0x1000L,
93         EHCA_MR_PGSIZE64K = 0x10000L,
94         EHCA_MR_PGSIZE1M  = 0x100000L,
95         EHCA_MR_PGSIZE16M = 0x1000000L
96 };
97
98 #define EHCA_MR_PGSHIFT4K  12
99 #define EHCA_MR_PGSHIFT64K 16
100 #define EHCA_MR_PGSHIFT1M  20
101 #define EHCA_MR_PGSHIFT16M 24
102
103 static u64 ehca_map_vaddr(void *caddr);
104
105 static u32 ehca_encode_hwpage_size(u32 pgsize)
106 {
107         int log = ilog2(pgsize);
108         WARN_ON(log < 12 || log > 24 || log & 3);
109         return (log - 12) / 4;
110 }
111
112 static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
113 {
114         return 1UL << ilog2(shca->hca_cap_mr_pgsize);
115 }
116
117 static struct ehca_mr *ehca_mr_new(void)
118 {
119         struct ehca_mr *me;
120
121         me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
122         if (me)
123                 spin_lock_init(&me->mrlock);
124         else
125                 ehca_gen_err("alloc failed");
126
127         return me;
128 }
129
130 static void ehca_mr_delete(struct ehca_mr *me)
131 {
132         kmem_cache_free(mr_cache, me);
133 }
134
135 static struct ehca_mw *ehca_mw_new(void)
136 {
137         struct ehca_mw *me;
138
139         me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
140         if (me)
141                 spin_lock_init(&me->mwlock);
142         else
143                 ehca_gen_err("alloc failed");
144
145         return me;
146 }
147
148 static void ehca_mw_delete(struct ehca_mw *me)
149 {
150         kmem_cache_free(mw_cache, me);
151 }
152
153 /*----------------------------------------------------------------------*/
154
155 struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
156 {
157         struct ib_mr *ib_mr;
158         int ret;
159         struct ehca_mr *e_maxmr;
160         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
161         struct ehca_shca *shca =
162                 container_of(pd->device, struct ehca_shca, ib_device);
163
164         if (shca->maxmr) {
165                 e_maxmr = ehca_mr_new();
166                 if (!e_maxmr) {
167                         ehca_err(&shca->ib_device, "out of memory");
168                         ib_mr = ERR_PTR(-ENOMEM);
169                         goto get_dma_mr_exit0;
170                 }
171
172                 ret = ehca_reg_maxmr(shca, e_maxmr,
173                                      (void *)ehca_map_vaddr((void *)KERNELBASE),
174                                      mr_access_flags, e_pd,
175                                      &e_maxmr->ib.ib_mr.lkey,
176                                      &e_maxmr->ib.ib_mr.rkey);
177                 if (ret) {
178                         ehca_mr_delete(e_maxmr);
179                         ib_mr = ERR_PTR(ret);
180                         goto get_dma_mr_exit0;
181                 }
182                 ib_mr = &e_maxmr->ib.ib_mr;
183         } else {
184                 ehca_err(&shca->ib_device, "no internal max-MR exist!");
185                 ib_mr = ERR_PTR(-EINVAL);
186                 goto get_dma_mr_exit0;
187         }
188
189 get_dma_mr_exit0:
190         if (IS_ERR(ib_mr))
191                 ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
192                          PTR_ERR(ib_mr), pd, mr_access_flags);
193         return ib_mr;
194 } /* end ehca_get_dma_mr() */
195
196 /*----------------------------------------------------------------------*/
197
198 struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
199                                struct ib_phys_buf *phys_buf_array,
200                                int num_phys_buf,
201                                int mr_access_flags,
202                                u64 *iova_start)
203 {
204         struct ib_mr *ib_mr;
205         int ret;
206         struct ehca_mr *e_mr;
207         struct ehca_shca *shca =
208                 container_of(pd->device, struct ehca_shca, ib_device);
209         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
210
211         u64 size;
212
213         if ((num_phys_buf <= 0) || !phys_buf_array) {
214                 ehca_err(pd->device, "bad input values: num_phys_buf=%x "
215                          "phys_buf_array=%p", num_phys_buf, phys_buf_array);
216                 ib_mr = ERR_PTR(-EINVAL);
217                 goto reg_phys_mr_exit0;
218         }
219         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
220              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
221             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
222              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
223                 /*
224                  * Remote Write Access requires Local Write Access
225                  * Remote Atomic Access requires Local Write Access
226                  */
227                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
228                          mr_access_flags);
229                 ib_mr = ERR_PTR(-EINVAL);
230                 goto reg_phys_mr_exit0;
231         }
232
233         /* check physical buffer list and calculate size */
234         ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
235                                             iova_start, &size);
236         if (ret) {
237                 ib_mr = ERR_PTR(ret);
238                 goto reg_phys_mr_exit0;
239         }
240         if ((size == 0) ||
241             (((u64)iova_start + size) < (u64)iova_start)) {
242                 ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
243                          size, iova_start);
244                 ib_mr = ERR_PTR(-EINVAL);
245                 goto reg_phys_mr_exit0;
246         }
247
248         e_mr = ehca_mr_new();
249         if (!e_mr) {
250                 ehca_err(pd->device, "out of memory");
251                 ib_mr = ERR_PTR(-ENOMEM);
252                 goto reg_phys_mr_exit0;
253         }
254
255         /* register MR on HCA */
256         if (ehca_mr_is_maxmr(size, iova_start)) {
257                 e_mr->flags |= EHCA_MR_FLAG_MAXMR;
258                 ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
259                                      e_pd, &e_mr->ib.ib_mr.lkey,
260                                      &e_mr->ib.ib_mr.rkey);
261                 if (ret) {
262                         ib_mr = ERR_PTR(ret);
263                         goto reg_phys_mr_exit1;
264                 }
265         } else {
266                 struct ehca_mr_pginfo pginfo;
267                 u32 num_kpages;
268                 u32 num_hwpages;
269                 u64 hw_pgsize;
270
271                 num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
272                                         PAGE_SIZE);
273                 /* for kernel space we try most possible pgsize */
274                 hw_pgsize = ehca_get_max_hwpage_size(shca);
275                 num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
276                                          hw_pgsize);
277                 memset(&pginfo, 0, sizeof(pginfo));
278                 pginfo.type = EHCA_MR_PGI_PHYS;
279                 pginfo.num_kpages = num_kpages;
280                 pginfo.hwpage_size = hw_pgsize;
281                 pginfo.num_hwpages = num_hwpages;
282                 pginfo.u.phy.num_phys_buf = num_phys_buf;
283                 pginfo.u.phy.phys_buf_array = phys_buf_array;
284                 pginfo.next_hwpage =
285                         ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
286
287                 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
288                                   e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
289                                   &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
290                 if (ret) {
291                         ib_mr = ERR_PTR(ret);
292                         goto reg_phys_mr_exit1;
293                 }
294         }
295
296         /* successful registration of all pages */
297         return &e_mr->ib.ib_mr;
298
299 reg_phys_mr_exit1:
300         ehca_mr_delete(e_mr);
301 reg_phys_mr_exit0:
302         if (IS_ERR(ib_mr))
303                 ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
304                          "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
305                          PTR_ERR(ib_mr), pd, phys_buf_array,
306                          num_phys_buf, mr_access_flags, iova_start);
307         return ib_mr;
308 } /* end ehca_reg_phys_mr() */
309
310 /*----------------------------------------------------------------------*/
311
312 struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
313                                u64 virt, int mr_access_flags,
314                                struct ib_udata *udata)
315 {
316         struct ib_mr *ib_mr;
317         struct ehca_mr *e_mr;
318         struct ehca_shca *shca =
319                 container_of(pd->device, struct ehca_shca, ib_device);
320         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
321         struct ehca_mr_pginfo pginfo;
322         int ret, page_shift;
323         u32 num_kpages;
324         u32 num_hwpages;
325         u64 hwpage_size;
326
327         if (!pd) {
328                 ehca_gen_err("bad pd=%p", pd);
329                 return ERR_PTR(-EFAULT);
330         }
331
332         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
333              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
334             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
335              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
336                 /*
337                  * Remote Write Access requires Local Write Access
338                  * Remote Atomic Access requires Local Write Access
339                  */
340                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
341                          mr_access_flags);
342                 ib_mr = ERR_PTR(-EINVAL);
343                 goto reg_user_mr_exit0;
344         }
345
346         if (length == 0 || virt + length < virt) {
347                 ehca_err(pd->device, "bad input values: length=%llx "
348                          "virt_base=%llx", length, virt);
349                 ib_mr = ERR_PTR(-EINVAL);
350                 goto reg_user_mr_exit0;
351         }
352
353         e_mr = ehca_mr_new();
354         if (!e_mr) {
355                 ehca_err(pd->device, "out of memory");
356                 ib_mr = ERR_PTR(-ENOMEM);
357                 goto reg_user_mr_exit0;
358         }
359
360         e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
361                                  mr_access_flags, 0);
362         if (IS_ERR(e_mr->umem)) {
363                 ib_mr = (void *)e_mr->umem;
364                 goto reg_user_mr_exit1;
365         }
366
367         if (e_mr->umem->page_size != PAGE_SIZE) {
368                 ehca_err(pd->device, "page size not supported, "
369                          "e_mr->umem->page_size=%x", e_mr->umem->page_size);
370                 ib_mr = ERR_PTR(-EINVAL);
371                 goto reg_user_mr_exit2;
372         }
373
374         /* determine number of MR pages */
375         num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
376         /* select proper hw_pgsize */
377         page_shift = PAGE_SHIFT;
378         if (e_mr->umem->hugetlb) {
379                 /* determine page_shift, clamp between 4K and 16M */
380                 page_shift = (fls64(length - 1) + 3) & ~3;
381                 page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
382                                  EHCA_MR_PGSHIFT16M);
383         }
384         hwpage_size = 1UL << page_shift;
385
386         /* now that we have the desired page size, shift until it's
387          * supported, too. 4K is always supported, so this terminates.
388          */
389         while (!(hwpage_size & shca->hca_cap_mr_pgsize))
390                 hwpage_size >>= 4;
391
392 reg_user_mr_fallback:
393         num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
394         /* register MR on HCA */
395         memset(&pginfo, 0, sizeof(pginfo));
396         pginfo.type = EHCA_MR_PGI_USER;
397         pginfo.hwpage_size = hwpage_size;
398         pginfo.num_kpages = num_kpages;
399         pginfo.num_hwpages = num_hwpages;
400         pginfo.u.usr.region = e_mr->umem;
401         pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
402         pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
403                                                      (&e_mr->umem->chunk_list),
404                                                      list);
405
406         ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
407                           e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
408                           &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
409         if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
410                 ehca_warn(pd->device, "failed to register mr "
411                           "with hwpage_size=%llx", hwpage_size);
412                 ehca_info(pd->device, "try to register mr with "
413                           "kpage_size=%lx", PAGE_SIZE);
414                 /*
415                  * this means kpages are not contiguous for a hw page
416                  * try kernel page size as fallback solution
417                  */
418                 hwpage_size = PAGE_SIZE;
419                 goto reg_user_mr_fallback;
420         }
421         if (ret) {
422                 ib_mr = ERR_PTR(ret);
423                 goto reg_user_mr_exit2;
424         }
425
426         /* successful registration of all pages */
427         return &e_mr->ib.ib_mr;
428
429 reg_user_mr_exit2:
430         ib_umem_release(e_mr->umem);
431 reg_user_mr_exit1:
432         ehca_mr_delete(e_mr);
433 reg_user_mr_exit0:
434         if (IS_ERR(ib_mr))
435                 ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
436                          PTR_ERR(ib_mr), pd, mr_access_flags, udata);
437         return ib_mr;
438 } /* end ehca_reg_user_mr() */
439
440 /*----------------------------------------------------------------------*/
441
442 int ehca_rereg_phys_mr(struct ib_mr *mr,
443                        int mr_rereg_mask,
444                        struct ib_pd *pd,
445                        struct ib_phys_buf *phys_buf_array,
446                        int num_phys_buf,
447                        int mr_access_flags,
448                        u64 *iova_start)
449 {
450         int ret;
451
452         struct ehca_shca *shca =
453                 container_of(mr->device, struct ehca_shca, ib_device);
454         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
455         u64 new_size;
456         u64 *new_start;
457         u32 new_acl;
458         struct ehca_pd *new_pd;
459         u32 tmp_lkey, tmp_rkey;
460         unsigned long sl_flags;
461         u32 num_kpages = 0;
462         u32 num_hwpages = 0;
463         struct ehca_mr_pginfo pginfo;
464
465         if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
466                 /* TODO not supported, because PHYP rereg hCall needs pages */
467                 ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
468                          "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
469                 ret = -EINVAL;
470                 goto rereg_phys_mr_exit0;
471         }
472
473         if (mr_rereg_mask & IB_MR_REREG_PD) {
474                 if (!pd) {
475                         ehca_err(mr->device, "rereg with bad pd, pd=%p "
476                                  "mr_rereg_mask=%x", pd, mr_rereg_mask);
477                         ret = -EINVAL;
478                         goto rereg_phys_mr_exit0;
479                 }
480         }
481
482         if ((mr_rereg_mask &
483              ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
484             (mr_rereg_mask == 0)) {
485                 ret = -EINVAL;
486                 goto rereg_phys_mr_exit0;
487         }
488
489         /* check other parameters */
490         if (e_mr == shca->maxmr) {
491                 /* should be impossible, however reject to be sure */
492                 ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
493                          "shca->maxmr=%p mr->lkey=%x",
494                          mr, shca->maxmr, mr->lkey);
495                 ret = -EINVAL;
496                 goto rereg_phys_mr_exit0;
497         }
498         if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
499                 if (e_mr->flags & EHCA_MR_FLAG_FMR) {
500                         ehca_err(mr->device, "not supported for FMR, mr=%p "
501                                  "flags=%x", mr, e_mr->flags);
502                         ret = -EINVAL;
503                         goto rereg_phys_mr_exit0;
504                 }
505                 if (!phys_buf_array || num_phys_buf <= 0) {
506                         ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
507                                  " phys_buf_array=%p num_phys_buf=%x",
508                                  mr_rereg_mask, phys_buf_array, num_phys_buf);
509                         ret = -EINVAL;
510                         goto rereg_phys_mr_exit0;
511                 }
512         }
513         if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&     /* change ACL */
514             (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
515               !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
516              ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
517               !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
518                 /*
519                  * Remote Write Access requires Local Write Access
520                  * Remote Atomic Access requires Local Write Access
521                  */
522                 ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
523                          "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
524                 ret = -EINVAL;
525                 goto rereg_phys_mr_exit0;
526         }
527
528         /* set requested values dependent on rereg request */
529         spin_lock_irqsave(&e_mr->mrlock, sl_flags);
530         new_start = e_mr->start;
531         new_size = e_mr->size;
532         new_acl = e_mr->acl;
533         new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
534
535         if (mr_rereg_mask & IB_MR_REREG_TRANS) {
536                 u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
537
538                 new_start = iova_start; /* change address */
539                 /* check physical buffer list and calculate size */
540                 ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
541                                                     num_phys_buf, iova_start,
542                                                     &new_size);
543                 if (ret)
544                         goto rereg_phys_mr_exit1;
545                 if ((new_size == 0) ||
546                     (((u64)iova_start + new_size) < (u64)iova_start)) {
547                         ehca_err(mr->device, "bad input values: new_size=%llx "
548                                  "iova_start=%p", new_size, iova_start);
549                         ret = -EINVAL;
550                         goto rereg_phys_mr_exit1;
551                 }
552                 num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
553                                         new_size, PAGE_SIZE);
554                 num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
555                                          new_size, hw_pgsize);
556                 memset(&pginfo, 0, sizeof(pginfo));
557                 pginfo.type = EHCA_MR_PGI_PHYS;
558                 pginfo.num_kpages = num_kpages;
559                 pginfo.hwpage_size = hw_pgsize;
560                 pginfo.num_hwpages = num_hwpages;
561                 pginfo.u.phy.num_phys_buf = num_phys_buf;
562                 pginfo.u.phy.phys_buf_array = phys_buf_array;
563                 pginfo.next_hwpage =
564                         ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
565         }
566         if (mr_rereg_mask & IB_MR_REREG_ACCESS)
567                 new_acl = mr_access_flags;
568         if (mr_rereg_mask & IB_MR_REREG_PD)
569                 new_pd = container_of(pd, struct ehca_pd, ib_pd);
570
571         ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
572                             new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
573         if (ret)
574                 goto rereg_phys_mr_exit1;
575
576         /* successful reregistration */
577         if (mr_rereg_mask & IB_MR_REREG_PD)
578                 mr->pd = pd;
579         mr->lkey = tmp_lkey;
580         mr->rkey = tmp_rkey;
581
582 rereg_phys_mr_exit1:
583         spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
584 rereg_phys_mr_exit0:
585         if (ret)
586                 ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
587                          "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
588                          "iova_start=%p",
589                          ret, mr, mr_rereg_mask, pd, phys_buf_array,
590                          num_phys_buf, mr_access_flags, iova_start);
591         return ret;
592 } /* end ehca_rereg_phys_mr() */
593
594 /*----------------------------------------------------------------------*/
595
596 int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
597 {
598         int ret = 0;
599         u64 h_ret;
600         struct ehca_shca *shca =
601                 container_of(mr->device, struct ehca_shca, ib_device);
602         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
603         unsigned long sl_flags;
604         struct ehca_mr_hipzout_parms hipzout;
605
606         if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
607                 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
608                          "e_mr->flags=%x", mr, e_mr, e_mr->flags);
609                 ret = -EINVAL;
610                 goto query_mr_exit0;
611         }
612
613         memset(mr_attr, 0, sizeof(struct ib_mr_attr));
614         spin_lock_irqsave(&e_mr->mrlock, sl_flags);
615
616         h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
617         if (h_ret != H_SUCCESS) {
618                 ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
619                          "hca_hndl=%llx mr_hndl=%llx lkey=%x",
620                          h_ret, mr, shca->ipz_hca_handle.handle,
621                          e_mr->ipz_mr_handle.handle, mr->lkey);
622                 ret = ehca2ib_return_code(h_ret);
623                 goto query_mr_exit1;
624         }
625         mr_attr->pd = mr->pd;
626         mr_attr->device_virt_addr = hipzout.vaddr;
627         mr_attr->size = hipzout.len;
628         mr_attr->lkey = hipzout.lkey;
629         mr_attr->rkey = hipzout.rkey;
630         ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
631
632 query_mr_exit1:
633         spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
634 query_mr_exit0:
635         if (ret)
636                 ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
637                          ret, mr, mr_attr);
638         return ret;
639 } /* end ehca_query_mr() */
640
641 /*----------------------------------------------------------------------*/
642
643 int ehca_dereg_mr(struct ib_mr *mr)
644 {
645         int ret = 0;
646         u64 h_ret;
647         struct ehca_shca *shca =
648                 container_of(mr->device, struct ehca_shca, ib_device);
649         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
650
651         if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
652                 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
653                          "e_mr->flags=%x", mr, e_mr, e_mr->flags);
654                 ret = -EINVAL;
655                 goto dereg_mr_exit0;
656         } else if (e_mr == shca->maxmr) {
657                 /* should be impossible, however reject to be sure */
658                 ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
659                          "shca->maxmr=%p mr->lkey=%x",
660                          mr, shca->maxmr, mr->lkey);
661                 ret = -EINVAL;
662                 goto dereg_mr_exit0;
663         }
664
665         /* TODO: BUSY: MR still has bound window(s) */
666         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
667         if (h_ret != H_SUCCESS) {
668                 ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
669                          "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
670                          h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
671                          e_mr->ipz_mr_handle.handle, mr->lkey);
672                 ret = ehca2ib_return_code(h_ret);
673                 goto dereg_mr_exit0;
674         }
675
676         if (e_mr->umem)
677                 ib_umem_release(e_mr->umem);
678
679         /* successful deregistration */
680         ehca_mr_delete(e_mr);
681
682 dereg_mr_exit0:
683         if (ret)
684                 ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
685         return ret;
686 } /* end ehca_dereg_mr() */
687
688 /*----------------------------------------------------------------------*/
689
690 struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
691 {
692         struct ib_mw *ib_mw;
693         u64 h_ret;
694         struct ehca_mw *e_mw;
695         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
696         struct ehca_shca *shca =
697                 container_of(pd->device, struct ehca_shca, ib_device);
698         struct ehca_mw_hipzout_parms hipzout;
699
700         e_mw = ehca_mw_new();
701         if (!e_mw) {
702                 ib_mw = ERR_PTR(-ENOMEM);
703                 goto alloc_mw_exit0;
704         }
705
706         h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
707                                          e_pd->fw_pd, &hipzout);
708         if (h_ret != H_SUCCESS) {
709                 ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
710                          "shca=%p hca_hndl=%llx mw=%p",
711                          h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
712                 ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
713                 goto alloc_mw_exit1;
714         }
715         /* successful MW allocation */
716         e_mw->ipz_mw_handle = hipzout.handle;
717         e_mw->ib_mw.rkey    = hipzout.rkey;
718         return &e_mw->ib_mw;
719
720 alloc_mw_exit1:
721         ehca_mw_delete(e_mw);
722 alloc_mw_exit0:
723         if (IS_ERR(ib_mw))
724                 ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
725         return ib_mw;
726 } /* end ehca_alloc_mw() */
727
728 /*----------------------------------------------------------------------*/
729
730 int ehca_bind_mw(struct ib_qp *qp,
731                  struct ib_mw *mw,
732                  struct ib_mw_bind *mw_bind)
733 {
734         /* TODO: not supported up to now */
735         ehca_gen_err("bind MW currently not supported by HCAD");
736
737         return -EPERM;
738 } /* end ehca_bind_mw() */
739
740 /*----------------------------------------------------------------------*/
741
742 int ehca_dealloc_mw(struct ib_mw *mw)
743 {
744         u64 h_ret;
745         struct ehca_shca *shca =
746                 container_of(mw->device, struct ehca_shca, ib_device);
747         struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
748
749         h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
750         if (h_ret != H_SUCCESS) {
751                 ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
752                          "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
753                          h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
754                          e_mw->ipz_mw_handle.handle);
755                 return ehca2ib_return_code(h_ret);
756         }
757         /* successful deallocation */
758         ehca_mw_delete(e_mw);
759         return 0;
760 } /* end ehca_dealloc_mw() */
761
762 /*----------------------------------------------------------------------*/
763
764 struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
765                               int mr_access_flags,
766                               struct ib_fmr_attr *fmr_attr)
767 {
768         struct ib_fmr *ib_fmr;
769         struct ehca_shca *shca =
770                 container_of(pd->device, struct ehca_shca, ib_device);
771         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
772         struct ehca_mr *e_fmr;
773         int ret;
774         u32 tmp_lkey, tmp_rkey;
775         struct ehca_mr_pginfo pginfo;
776         u64 hw_pgsize;
777
778         /* check other parameters */
779         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
780              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
781             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
782              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
783                 /*
784                  * Remote Write Access requires Local Write Access
785                  * Remote Atomic Access requires Local Write Access
786                  */
787                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
788                          mr_access_flags);
789                 ib_fmr = ERR_PTR(-EINVAL);
790                 goto alloc_fmr_exit0;
791         }
792         if (mr_access_flags & IB_ACCESS_MW_BIND) {
793                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
794                          mr_access_flags);
795                 ib_fmr = ERR_PTR(-EINVAL);
796                 goto alloc_fmr_exit0;
797         }
798         if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
799                 ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
800                          "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
801                          fmr_attr->max_pages, fmr_attr->max_maps,
802                          fmr_attr->page_shift);
803                 ib_fmr = ERR_PTR(-EINVAL);
804                 goto alloc_fmr_exit0;
805         }
806
807         hw_pgsize = 1 << fmr_attr->page_shift;
808         if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
809                 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
810                          fmr_attr->page_shift);
811                 ib_fmr = ERR_PTR(-EINVAL);
812                 goto alloc_fmr_exit0;
813         }
814
815         e_fmr = ehca_mr_new();
816         if (!e_fmr) {
817                 ib_fmr = ERR_PTR(-ENOMEM);
818                 goto alloc_fmr_exit0;
819         }
820         e_fmr->flags |= EHCA_MR_FLAG_FMR;
821
822         /* register MR on HCA */
823         memset(&pginfo, 0, sizeof(pginfo));
824         pginfo.hwpage_size = hw_pgsize;
825         /*
826          * pginfo.num_hwpages==0, ie register_rpages() will not be called
827          * but deferred to map_phys_fmr()
828          */
829         ret = ehca_reg_mr(shca, e_fmr, NULL,
830                           fmr_attr->max_pages * (1 << fmr_attr->page_shift),
831                           mr_access_flags, e_pd, &pginfo,
832                           &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
833         if (ret) {
834                 ib_fmr = ERR_PTR(ret);
835                 goto alloc_fmr_exit1;
836         }
837
838         /* successful */
839         e_fmr->hwpage_size = hw_pgsize;
840         e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
841         e_fmr->fmr_max_pages = fmr_attr->max_pages;
842         e_fmr->fmr_max_maps = fmr_attr->max_maps;
843         e_fmr->fmr_map_cnt = 0;
844         return &e_fmr->ib.ib_fmr;
845
846 alloc_fmr_exit1:
847         ehca_mr_delete(e_fmr);
848 alloc_fmr_exit0:
849         return ib_fmr;
850 } /* end ehca_alloc_fmr() */
851
852 /*----------------------------------------------------------------------*/
853
854 int ehca_map_phys_fmr(struct ib_fmr *fmr,
855                       u64 *page_list,
856                       int list_len,
857                       u64 iova)
858 {
859         int ret;
860         struct ehca_shca *shca =
861                 container_of(fmr->device, struct ehca_shca, ib_device);
862         struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
863         struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
864         struct ehca_mr_pginfo pginfo;
865         u32 tmp_lkey, tmp_rkey;
866
867         if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
868                 ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
869                          e_fmr, e_fmr->flags);
870                 ret = -EINVAL;
871                 goto map_phys_fmr_exit0;
872         }
873         ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
874         if (ret)
875                 goto map_phys_fmr_exit0;
876         if (iova % e_fmr->fmr_page_size) {
877                 /* only whole-numbered pages */
878                 ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
879                          iova, e_fmr->fmr_page_size);
880                 ret = -EINVAL;
881                 goto map_phys_fmr_exit0;
882         }
883         if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
884                 /* HCAD does not limit the maps, however trace this anyway */
885                 ehca_info(fmr->device, "map limit exceeded, fmr=%p "
886                           "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
887                           fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
888         }
889
890         memset(&pginfo, 0, sizeof(pginfo));
891         pginfo.type = EHCA_MR_PGI_FMR;
892         pginfo.num_kpages = list_len;
893         pginfo.hwpage_size = e_fmr->hwpage_size;
894         pginfo.num_hwpages =
895                 list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
896         pginfo.u.fmr.page_list = page_list;
897         pginfo.next_hwpage =
898                 (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
899         pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
900
901         ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
902                             list_len * e_fmr->fmr_page_size,
903                             e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
904         if (ret)
905                 goto map_phys_fmr_exit0;
906
907         /* successful reregistration */
908         e_fmr->fmr_map_cnt++;
909         e_fmr->ib.ib_fmr.lkey = tmp_lkey;
910         e_fmr->ib.ib_fmr.rkey = tmp_rkey;
911         return 0;
912
913 map_phys_fmr_exit0:
914         if (ret)
915                 ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
916                          "iova=%llx", ret, fmr, page_list, list_len, iova);
917         return ret;
918 } /* end ehca_map_phys_fmr() */
919
920 /*----------------------------------------------------------------------*/
921
922 int ehca_unmap_fmr(struct list_head *fmr_list)
923 {
924         int ret = 0;
925         struct ib_fmr *ib_fmr;
926         struct ehca_shca *shca = NULL;
927         struct ehca_shca *prev_shca;
928         struct ehca_mr *e_fmr;
929         u32 num_fmr = 0;
930         u32 unmap_fmr_cnt = 0;
931
932         /* check all FMR belong to same SHCA, and check internal flag */
933         list_for_each_entry(ib_fmr, fmr_list, list) {
934                 prev_shca = shca;
935                 if (!ib_fmr) {
936                         ehca_gen_err("bad fmr=%p in list", ib_fmr);
937                         ret = -EINVAL;
938                         goto unmap_fmr_exit0;
939                 }
940                 shca = container_of(ib_fmr->device, struct ehca_shca,
941                                     ib_device);
942                 e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
943                 if ((shca != prev_shca) && prev_shca) {
944                         ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
945                                  "prev_shca=%p e_fmr=%p",
946                                  shca, prev_shca, e_fmr);
947                         ret = -EINVAL;
948                         goto unmap_fmr_exit0;
949                 }
950                 if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
951                         ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
952                                  "e_fmr->flags=%x", e_fmr, e_fmr->flags);
953                         ret = -EINVAL;
954                         goto unmap_fmr_exit0;
955                 }
956                 num_fmr++;
957         }
958
959         /* loop over all FMRs to unmap */
960         list_for_each_entry(ib_fmr, fmr_list, list) {
961                 unmap_fmr_cnt++;
962                 e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
963                 shca = container_of(ib_fmr->device, struct ehca_shca,
964                                     ib_device);
965                 ret = ehca_unmap_one_fmr(shca, e_fmr);
966                 if (ret) {
967                         /* unmap failed, stop unmapping of rest of FMRs */
968                         ehca_err(&shca->ib_device, "unmap of one FMR failed, "
969                                  "stop rest, e_fmr=%p num_fmr=%x "
970                                  "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
971                                  unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
972                         goto unmap_fmr_exit0;
973                 }
974         }
975
976 unmap_fmr_exit0:
977         if (ret)
978                 ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
979                              ret, fmr_list, num_fmr, unmap_fmr_cnt);
980         return ret;
981 } /* end ehca_unmap_fmr() */
982
983 /*----------------------------------------------------------------------*/
984
985 int ehca_dealloc_fmr(struct ib_fmr *fmr)
986 {
987         int ret;
988         u64 h_ret;
989         struct ehca_shca *shca =
990                 container_of(fmr->device, struct ehca_shca, ib_device);
991         struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
992
993         if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
994                 ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
995                          e_fmr, e_fmr->flags);
996                 ret = -EINVAL;
997                 goto free_fmr_exit0;
998         }
999
1000         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1001         if (h_ret != H_SUCCESS) {
1002                 ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
1003                          "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
1004                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1005                          e_fmr->ipz_mr_handle.handle, fmr->lkey);
1006                 ret = ehca2ib_return_code(h_ret);
1007                 goto free_fmr_exit0;
1008         }
1009         /* successful deregistration */
1010         ehca_mr_delete(e_fmr);
1011         return 0;
1012
1013 free_fmr_exit0:
1014         if (ret)
1015                 ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
1016         return ret;
1017 } /* end ehca_dealloc_fmr() */
1018
1019 /*----------------------------------------------------------------------*/
1020
1021 static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
1022                                    struct ehca_mr *e_mr,
1023                                    struct ehca_mr_pginfo *pginfo);
1024
1025 int ehca_reg_mr(struct ehca_shca *shca,
1026                 struct ehca_mr *e_mr,
1027                 u64 *iova_start,
1028                 u64 size,
1029                 int acl,
1030                 struct ehca_pd *e_pd,
1031                 struct ehca_mr_pginfo *pginfo,
1032                 u32 *lkey, /*OUT*/
1033                 u32 *rkey, /*OUT*/
1034                 enum ehca_reg_type reg_type)
1035 {
1036         int ret;
1037         u64 h_ret;
1038         u32 hipz_acl;
1039         struct ehca_mr_hipzout_parms hipzout;
1040
1041         ehca_mrmw_map_acl(acl, &hipz_acl);
1042         ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1043         if (ehca_use_hp_mr == 1)
1044                 hipz_acl |= 0x00000001;
1045
1046         h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
1047                                          (u64)iova_start, size, hipz_acl,
1048                                          e_pd->fw_pd, &hipzout);
1049         if (h_ret != H_SUCCESS) {
1050                 ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
1051                          "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
1052                 ret = ehca2ib_return_code(h_ret);
1053                 goto ehca_reg_mr_exit0;
1054         }
1055
1056         e_mr->ipz_mr_handle = hipzout.handle;
1057
1058         if (reg_type == EHCA_REG_BUSMAP_MR)
1059                 ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
1060         else if (reg_type == EHCA_REG_MR)
1061                 ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
1062         else
1063                 ret = -EINVAL;
1064
1065         if (ret)
1066                 goto ehca_reg_mr_exit1;
1067
1068         /* successful registration */
1069         e_mr->num_kpages = pginfo->num_kpages;
1070         e_mr->num_hwpages = pginfo->num_hwpages;
1071         e_mr->hwpage_size = pginfo->hwpage_size;
1072         e_mr->start = iova_start;
1073         e_mr->size = size;
1074         e_mr->acl = acl;
1075         *lkey = hipzout.lkey;
1076         *rkey = hipzout.rkey;
1077         return 0;
1078
1079 ehca_reg_mr_exit1:
1080         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1081         if (h_ret != H_SUCCESS) {
1082                 ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
1083                          "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
1084                          "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
1085                          h_ret, shca, e_mr, iova_start, size, acl, e_pd,
1086                          hipzout.lkey, pginfo, pginfo->num_kpages,
1087                          pginfo->num_hwpages, ret);
1088                 ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
1089                          "not recoverable");
1090         }
1091 ehca_reg_mr_exit0:
1092         if (ret)
1093                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1094                          "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1095                          "num_kpages=%llx num_hwpages=%llx",
1096                          ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
1097                          pginfo->num_kpages, pginfo->num_hwpages);
1098         return ret;
1099 } /* end ehca_reg_mr() */
1100
1101 /*----------------------------------------------------------------------*/
1102
1103 int ehca_reg_mr_rpages(struct ehca_shca *shca,
1104                        struct ehca_mr *e_mr,
1105                        struct ehca_mr_pginfo *pginfo)
1106 {
1107         int ret = 0;
1108         u64 h_ret;
1109         u32 rnum;
1110         u64 rpage;
1111         u32 i;
1112         u64 *kpage;
1113
1114         if (!pginfo->num_hwpages) /* in case of fmr */
1115                 return 0;
1116
1117         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1118         if (!kpage) {
1119                 ehca_err(&shca->ib_device, "kpage alloc failed");
1120                 ret = -ENOMEM;
1121                 goto ehca_reg_mr_rpages_exit0;
1122         }
1123
1124         /* max MAX_RPAGES ehca mr pages per register call */
1125         for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
1126
1127                 if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1128                         rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
1129                         if (rnum == 0)
1130                                 rnum = MAX_RPAGES;      /* last shot is full */
1131                 } else
1132                         rnum = MAX_RPAGES;
1133
1134                 ret = ehca_set_pagebuf(pginfo, rnum, kpage);
1135                 if (ret) {
1136                         ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1137                                  "bad rc, ret=%i rnum=%x kpage=%p",
1138                                  ret, rnum, kpage);
1139                         goto ehca_reg_mr_rpages_exit1;
1140                 }
1141
1142                 if (rnum > 1) {
1143                         rpage = virt_to_abs(kpage);
1144                         if (!rpage) {
1145                                 ehca_err(&shca->ib_device, "kpage=%p i=%x",
1146                                          kpage, i);
1147                                 ret = -EFAULT;
1148                                 goto ehca_reg_mr_rpages_exit1;
1149                         }
1150                 } else
1151                         rpage = *kpage;
1152
1153                 h_ret = hipz_h_register_rpage_mr(
1154                         shca->ipz_hca_handle, e_mr,
1155                         ehca_encode_hwpage_size(pginfo->hwpage_size),
1156                         0, rpage, rnum);
1157
1158                 if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1159                         /*
1160                          * check for 'registration complete'==H_SUCCESS
1161                          * and for 'page registered'==H_PAGE_REGISTERED
1162                          */
1163                         if (h_ret != H_SUCCESS) {
1164                                 ehca_err(&shca->ib_device, "last "
1165                                          "hipz_reg_rpage_mr failed, h_ret=%lli "
1166                                          "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
1167                                          " lkey=%x", h_ret, e_mr, i,
1168                                          shca->ipz_hca_handle.handle,
1169                                          e_mr->ipz_mr_handle.handle,
1170                                          e_mr->ib.ib_mr.lkey);
1171                                 ret = ehca2ib_return_code(h_ret);
1172                                 break;
1173                         } else
1174                                 ret = 0;
1175                 } else if (h_ret != H_PAGE_REGISTERED) {
1176                         ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
1177                                  "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
1178                                  "mr_hndl=%llx", h_ret, e_mr, i,
1179                                  e_mr->ib.ib_mr.lkey,
1180                                  shca->ipz_hca_handle.handle,
1181                                  e_mr->ipz_mr_handle.handle);
1182                         ret = ehca2ib_return_code(h_ret);
1183                         break;
1184                 } else
1185                         ret = 0;
1186         } /* end for(i) */
1187
1188
1189 ehca_reg_mr_rpages_exit1:
1190         ehca_free_fw_ctrlblock(kpage);
1191 ehca_reg_mr_rpages_exit0:
1192         if (ret)
1193                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
1194                          "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
1195                          pginfo, pginfo->num_kpages, pginfo->num_hwpages);
1196         return ret;
1197 } /* end ehca_reg_mr_rpages() */
1198
1199 /*----------------------------------------------------------------------*/
1200
1201 inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1202                                 struct ehca_mr *e_mr,
1203                                 u64 *iova_start,
1204                                 u64 size,
1205                                 u32 acl,
1206                                 struct ehca_pd *e_pd,
1207                                 struct ehca_mr_pginfo *pginfo,
1208                                 u32 *lkey, /*OUT*/
1209                                 u32 *rkey) /*OUT*/
1210 {
1211         int ret;
1212         u64 h_ret;
1213         u32 hipz_acl;
1214         u64 *kpage;
1215         u64 rpage;
1216         struct ehca_mr_pginfo pginfo_save;
1217         struct ehca_mr_hipzout_parms hipzout;
1218
1219         ehca_mrmw_map_acl(acl, &hipz_acl);
1220         ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1221
1222         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1223         if (!kpage) {
1224                 ehca_err(&shca->ib_device, "kpage alloc failed");
1225                 ret = -ENOMEM;
1226                 goto ehca_rereg_mr_rereg1_exit0;
1227         }
1228
1229         pginfo_save = *pginfo;
1230         ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
1231         if (ret) {
1232                 ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
1233                          "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
1234                          "kpage=%p", e_mr, pginfo, pginfo->type,
1235                          pginfo->num_kpages, pginfo->num_hwpages, kpage);
1236                 goto ehca_rereg_mr_rereg1_exit1;
1237         }
1238         rpage = virt_to_abs(kpage);
1239         if (!rpage) {
1240                 ehca_err(&shca->ib_device, "kpage=%p", kpage);
1241                 ret = -EFAULT;
1242                 goto ehca_rereg_mr_rereg1_exit1;
1243         }
1244         h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
1245                                       (u64)iova_start, size, hipz_acl,
1246                                       e_pd->fw_pd, rpage, &hipzout);
1247         if (h_ret != H_SUCCESS) {
1248                 /*
1249                  * reregistration unsuccessful, try it again with the 3 hCalls,
1250                  * e.g. this is required in case H_MR_CONDITION
1251                  * (MW bound or MR is shared)
1252                  */
1253                 ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
1254                           "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
1255                 *pginfo = pginfo_save;
1256                 ret = -EAGAIN;
1257         } else if ((u64 *)hipzout.vaddr != iova_start) {
1258                 ehca_err(&shca->ib_device, "PHYP changed iova_start in "
1259                          "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
1260                          "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
1261                          hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
1262                          e_mr->ib.ib_mr.lkey, hipzout.lkey);
1263                 ret = -EFAULT;
1264         } else {
1265                 /*
1266                  * successful reregistration
1267                  * note: start and start_out are identical for eServer HCAs
1268                  */
1269                 e_mr->num_kpages = pginfo->num_kpages;
1270                 e_mr->num_hwpages = pginfo->num_hwpages;
1271                 e_mr->hwpage_size = pginfo->hwpage_size;
1272                 e_mr->start = iova_start;
1273                 e_mr->size = size;
1274                 e_mr->acl = acl;
1275                 *lkey = hipzout.lkey;
1276                 *rkey = hipzout.rkey;
1277         }
1278
1279 ehca_rereg_mr_rereg1_exit1:
1280         ehca_free_fw_ctrlblock(kpage);
1281 ehca_rereg_mr_rereg1_exit0:
1282         if ( ret && (ret != -EAGAIN) )
1283                 ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
1284                          "pginfo=%p num_kpages=%llx num_hwpages=%llx",
1285                          ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
1286                          pginfo->num_hwpages);
1287         return ret;
1288 } /* end ehca_rereg_mr_rereg1() */
1289
1290 /*----------------------------------------------------------------------*/
1291
1292 int ehca_rereg_mr(struct ehca_shca *shca,
1293                   struct ehca_mr *e_mr,
1294                   u64 *iova_start,
1295                   u64 size,
1296                   int acl,
1297                   struct ehca_pd *e_pd,
1298                   struct ehca_mr_pginfo *pginfo,
1299                   u32 *lkey,
1300                   u32 *rkey)
1301 {
1302         int ret = 0;
1303         u64 h_ret;
1304         int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
1305         int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
1306
1307         /* first determine reregistration hCall(s) */
1308         if ((pginfo->num_hwpages > MAX_RPAGES) ||
1309             (e_mr->num_hwpages > MAX_RPAGES) ||
1310             (pginfo->num_hwpages > e_mr->num_hwpages)) {
1311                 ehca_dbg(&shca->ib_device, "Rereg3 case, "
1312                          "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
1313                          pginfo->num_hwpages, e_mr->num_hwpages);
1314                 rereg_1_hcall = 0;
1315                 rereg_3_hcall = 1;
1316         }
1317
1318         if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
1319                 rereg_1_hcall = 0;
1320                 rereg_3_hcall = 1;
1321                 e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
1322                 ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
1323                          e_mr);
1324         }
1325
1326         if (rereg_1_hcall) {
1327                 ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
1328                                            acl, e_pd, pginfo, lkey, rkey);
1329                 if (ret) {
1330                         if (ret == -EAGAIN)
1331                                 rereg_3_hcall = 1;
1332                         else
1333                                 goto ehca_rereg_mr_exit0;
1334                 }
1335         }
1336
1337         if (rereg_3_hcall) {
1338                 struct ehca_mr save_mr;
1339
1340                 /* first deregister old MR */
1341                 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1342                 if (h_ret != H_SUCCESS) {
1343                         ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1344                                  "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
1345                                  "mr->lkey=%x",
1346                                  h_ret, e_mr, shca->ipz_hca_handle.handle,
1347                                  e_mr->ipz_mr_handle.handle,
1348                                  e_mr->ib.ib_mr.lkey);
1349                         ret = ehca2ib_return_code(h_ret);
1350                         goto ehca_rereg_mr_exit0;
1351                 }
1352                 /* clean ehca_mr_t, without changing struct ib_mr and lock */
1353                 save_mr = *e_mr;
1354                 ehca_mr_deletenew(e_mr);
1355
1356                 /* set some MR values */
1357                 e_mr->flags = save_mr.flags;
1358                 e_mr->hwpage_size = save_mr.hwpage_size;
1359                 e_mr->fmr_page_size = save_mr.fmr_page_size;
1360                 e_mr->fmr_max_pages = save_mr.fmr_max_pages;
1361                 e_mr->fmr_max_maps = save_mr.fmr_max_maps;
1362                 e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
1363
1364                 ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
1365                                   e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
1366                 if (ret) {
1367                         u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
1368                         memcpy(&e_mr->flags, &(save_mr.flags),
1369                                sizeof(struct ehca_mr) - offset);
1370                         goto ehca_rereg_mr_exit0;
1371                 }
1372         }
1373
1374 ehca_rereg_mr_exit0:
1375         if (ret)
1376                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1377                          "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1378                          "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
1379                          "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
1380                          acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
1381                          rereg_1_hcall, rereg_3_hcall);
1382         return ret;
1383 } /* end ehca_rereg_mr() */
1384
1385 /*----------------------------------------------------------------------*/
1386
1387 int ehca_unmap_one_fmr(struct ehca_shca *shca,
1388                        struct ehca_mr *e_fmr)
1389 {
1390         int ret = 0;
1391         u64 h_ret;
1392         struct ehca_pd *e_pd =
1393                 container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
1394         struct ehca_mr save_fmr;
1395         u32 tmp_lkey, tmp_rkey;
1396         struct ehca_mr_pginfo pginfo;
1397         struct ehca_mr_hipzout_parms hipzout;
1398         struct ehca_mr save_mr;
1399
1400         if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
1401                 /*
1402                  * note: after using rereg hcall with len=0,
1403                  * rereg hcall must be used again for registering pages
1404                  */
1405                 h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
1406                                               0, 0, e_pd->fw_pd, 0, &hipzout);
1407                 if (h_ret == H_SUCCESS) {
1408                         /* successful reregistration */
1409                         e_fmr->start = NULL;
1410                         e_fmr->size = 0;
1411                         tmp_lkey = hipzout.lkey;
1412                         tmp_rkey = hipzout.rkey;
1413                         return 0;
1414                 }
1415                 /*
1416                  * should not happen, because length checked above,
1417                  * FMRs are not shared and no MW bound to FMRs
1418                  */
1419                 ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1420                          "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
1421                          "mr_hndl=%llx lkey=%x lkey_out=%x",
1422                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1423                          e_fmr->ipz_mr_handle.handle,
1424                          e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1425                 /* try free and rereg */
1426         }
1427
1428         /* first free old FMR */
1429         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1430         if (h_ret != H_SUCCESS) {
1431                 ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1432                          "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
1433                          "lkey=%x",
1434                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1435                          e_fmr->ipz_mr_handle.handle,
1436                          e_fmr->ib.ib_fmr.lkey);
1437                 ret = ehca2ib_return_code(h_ret);
1438                 goto ehca_unmap_one_fmr_exit0;
1439         }
1440         /* clean ehca_mr_t, without changing lock */
1441         save_fmr = *e_fmr;
1442         ehca_mr_deletenew(e_fmr);
1443
1444         /* set some MR values */
1445         e_fmr->flags = save_fmr.flags;
1446         e_fmr->hwpage_size = save_fmr.hwpage_size;
1447         e_fmr->fmr_page_size = save_fmr.fmr_page_size;
1448         e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
1449         e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
1450         e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
1451         e_fmr->acl = save_fmr.acl;
1452
1453         memset(&pginfo, 0, sizeof(pginfo));
1454         pginfo.type = EHCA_MR_PGI_FMR;
1455         ret = ehca_reg_mr(shca, e_fmr, NULL,
1456                           (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
1457                           e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
1458                           &tmp_rkey, EHCA_REG_MR);
1459         if (ret) {
1460                 u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
1461                 memcpy(&e_fmr->flags, &(save_mr.flags),
1462                        sizeof(struct ehca_mr) - offset);
1463         }
1464
1465 ehca_unmap_one_fmr_exit0:
1466         if (ret)
1467                 ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
1468                          "fmr_max_pages=%x",
1469                          ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
1470         return ret;
1471 } /* end ehca_unmap_one_fmr() */
1472
1473 /*----------------------------------------------------------------------*/
1474
1475 int ehca_reg_smr(struct ehca_shca *shca,
1476                  struct ehca_mr *e_origmr,
1477                  struct ehca_mr *e_newmr,
1478                  u64 *iova_start,
1479                  int acl,
1480                  struct ehca_pd *e_pd,
1481                  u32 *lkey, /*OUT*/
1482                  u32 *rkey) /*OUT*/
1483 {
1484         int ret = 0;
1485         u64 h_ret;
1486         u32 hipz_acl;
1487         struct ehca_mr_hipzout_parms hipzout;
1488
1489         ehca_mrmw_map_acl(acl, &hipz_acl);
1490         ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1491
1492         h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1493                                     (u64)iova_start, hipz_acl, e_pd->fw_pd,
1494                                     &hipzout);
1495         if (h_ret != H_SUCCESS) {
1496                 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1497                          "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
1498                          "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1499                          h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
1500                          shca->ipz_hca_handle.handle,
1501                          e_origmr->ipz_mr_handle.handle,
1502                          e_origmr->ib.ib_mr.lkey);
1503                 ret = ehca2ib_return_code(h_ret);
1504                 goto ehca_reg_smr_exit0;
1505         }
1506         /* successful registration */
1507         e_newmr->num_kpages = e_origmr->num_kpages;
1508         e_newmr->num_hwpages = e_origmr->num_hwpages;
1509         e_newmr->hwpage_size   = e_origmr->hwpage_size;
1510         e_newmr->start = iova_start;
1511         e_newmr->size = e_origmr->size;
1512         e_newmr->acl = acl;
1513         e_newmr->ipz_mr_handle = hipzout.handle;
1514         *lkey = hipzout.lkey;
1515         *rkey = hipzout.rkey;
1516         return 0;
1517
1518 ehca_reg_smr_exit0:
1519         if (ret)
1520                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
1521                          "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
1522                          ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
1523         return ret;
1524 } /* end ehca_reg_smr() */
1525
1526 /*----------------------------------------------------------------------*/
1527 static inline void *ehca_calc_sectbase(int top, int dir, int idx)
1528 {
1529         unsigned long ret = idx;
1530         ret |= dir << EHCA_DIR_INDEX_SHIFT;
1531         ret |= top << EHCA_TOP_INDEX_SHIFT;
1532         return abs_to_virt(ret << SECTION_SIZE_BITS);
1533 }
1534
1535 #define ehca_bmap_valid(entry) \
1536         ((u64)entry != (u64)EHCA_INVAL_ADDR)
1537
1538 static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
1539                                struct ehca_shca *shca, struct ehca_mr *mr,
1540                                struct ehca_mr_pginfo *pginfo)
1541 {
1542         u64 h_ret = 0;
1543         unsigned long page = 0;
1544         u64 rpage = virt_to_abs(kpage);
1545         int page_count;
1546
1547         void *sectbase = ehca_calc_sectbase(top, dir, idx);
1548         if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
1549                 ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
1550                                            "hwpage_size does not fit to "
1551                                            "section start address");
1552         }
1553         page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
1554
1555         while (page < page_count) {
1556                 u64 rnum;
1557                 for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
1558                      rnum++) {
1559                         void *pg = sectbase + ((page++) * pginfo->hwpage_size);
1560                         kpage[rnum] = virt_to_abs(pg);
1561                 }
1562
1563                 h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
1564                         ehca_encode_hwpage_size(pginfo->hwpage_size),
1565                         0, rpage, rnum);
1566
1567                 if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
1568                         ehca_err(&shca->ib_device, "register_rpage_mr failed");
1569                         return h_ret;
1570                 }
1571         }
1572         return h_ret;
1573 }
1574
1575 static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
1576                                 struct ehca_shca *shca, struct ehca_mr *mr,
1577                                 struct ehca_mr_pginfo *pginfo)
1578 {
1579         u64 hret = H_SUCCESS;
1580         int idx;
1581
1582         for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
1583                 if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
1584                         continue;
1585
1586                 hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
1587                                            pginfo);
1588                 if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1589                                 return hret;
1590         }
1591         return hret;
1592 }
1593
1594 static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
1595                                     struct ehca_mr *mr,
1596                                     struct ehca_mr_pginfo *pginfo)
1597 {
1598         u64 hret = H_SUCCESS;
1599         int dir;
1600
1601         for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
1602                 if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
1603                         continue;
1604
1605                 hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
1606                 if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1607                                 return hret;
1608         }
1609         return hret;
1610 }
1611
1612 /* register internal max-MR to internal SHCA */
1613 int ehca_reg_internal_maxmr(
1614         struct ehca_shca *shca,
1615         struct ehca_pd *e_pd,
1616         struct ehca_mr **e_maxmr)  /*OUT*/
1617 {
1618         int ret;
1619         struct ehca_mr *e_mr;
1620         u64 *iova_start;
1621         u64 size_maxmr;
1622         struct ehca_mr_pginfo pginfo;
1623         struct ib_phys_buf ib_pbuf;
1624         u32 num_kpages;
1625         u32 num_hwpages;
1626         u64 hw_pgsize;
1627
1628         if (!ehca_bmap) {
1629                 ret = -EFAULT;
1630                 goto ehca_reg_internal_maxmr_exit0;
1631         }
1632
1633         e_mr = ehca_mr_new();
1634         if (!e_mr) {
1635                 ehca_err(&shca->ib_device, "out of memory");
1636                 ret = -ENOMEM;
1637                 goto ehca_reg_internal_maxmr_exit0;
1638         }
1639         e_mr->flags |= EHCA_MR_FLAG_MAXMR;
1640
1641         /* register internal max-MR on HCA */
1642         size_maxmr = ehca_mr_len;
1643         iova_start = (u64 *)ehca_map_vaddr((void *)KERNELBASE);
1644         ib_pbuf.addr = 0;
1645         ib_pbuf.size = size_maxmr;
1646         num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
1647                                 PAGE_SIZE);
1648         hw_pgsize = ehca_get_max_hwpage_size(shca);
1649         num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
1650                                  hw_pgsize);
1651
1652         memset(&pginfo, 0, sizeof(pginfo));
1653         pginfo.type = EHCA_MR_PGI_PHYS;
1654         pginfo.num_kpages = num_kpages;
1655         pginfo.num_hwpages = num_hwpages;
1656         pginfo.hwpage_size = hw_pgsize;
1657         pginfo.u.phy.num_phys_buf = 1;
1658         pginfo.u.phy.phys_buf_array = &ib_pbuf;
1659
1660         ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1661                           &pginfo, &e_mr->ib.ib_mr.lkey,
1662                           &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
1663         if (ret) {
1664                 ehca_err(&shca->ib_device, "reg of internal max MR failed, "
1665                          "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
1666                          "num_hwpages=%x", e_mr, iova_start, size_maxmr,
1667                          num_kpages, num_hwpages);
1668                 goto ehca_reg_internal_maxmr_exit1;
1669         }
1670
1671         /* successful registration of all pages */
1672         e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
1673         e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
1674         e_mr->ib.ib_mr.uobject = NULL;
1675         atomic_inc(&(e_pd->ib_pd.usecnt));
1676         atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
1677         *e_maxmr = e_mr;
1678         return 0;
1679
1680 ehca_reg_internal_maxmr_exit1:
1681         ehca_mr_delete(e_mr);
1682 ehca_reg_internal_maxmr_exit0:
1683         if (ret)
1684                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
1685                          ret, shca, e_pd, e_maxmr);
1686         return ret;
1687 } /* end ehca_reg_internal_maxmr() */
1688
1689 /*----------------------------------------------------------------------*/
1690
1691 int ehca_reg_maxmr(struct ehca_shca *shca,
1692                    struct ehca_mr *e_newmr,
1693                    u64 *iova_start,
1694                    int acl,
1695                    struct ehca_pd *e_pd,
1696                    u32 *lkey,
1697                    u32 *rkey)
1698 {
1699         u64 h_ret;
1700         struct ehca_mr *e_origmr = shca->maxmr;
1701         u32 hipz_acl;
1702         struct ehca_mr_hipzout_parms hipzout;
1703
1704         ehca_mrmw_map_acl(acl, &hipz_acl);
1705         ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1706
1707         h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1708                                     (u64)iova_start, hipz_acl, e_pd->fw_pd,
1709                                     &hipzout);
1710         if (h_ret != H_SUCCESS) {
1711                 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1712                          "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1713                          h_ret, e_origmr, shca->ipz_hca_handle.handle,
1714                          e_origmr->ipz_mr_handle.handle,
1715                          e_origmr->ib.ib_mr.lkey);
1716                 return ehca2ib_return_code(h_ret);
1717         }
1718         /* successful registration */
1719         e_newmr->num_kpages = e_origmr->num_kpages;
1720         e_newmr->num_hwpages = e_origmr->num_hwpages;
1721         e_newmr->hwpage_size = e_origmr->hwpage_size;
1722         e_newmr->start = iova_start;
1723         e_newmr->size = e_origmr->size;
1724         e_newmr->acl = acl;
1725         e_newmr->ipz_mr_handle = hipzout.handle;
1726         *lkey = hipzout.lkey;
1727         *rkey = hipzout.rkey;
1728         return 0;
1729 } /* end ehca_reg_maxmr() */
1730
1731 /*----------------------------------------------------------------------*/
1732
1733 int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1734 {
1735         int ret;
1736         struct ehca_mr *e_maxmr;
1737         struct ib_pd *ib_pd;
1738
1739         if (!shca->maxmr) {
1740                 ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
1741                 ret = -EINVAL;
1742                 goto ehca_dereg_internal_maxmr_exit0;
1743         }
1744
1745         e_maxmr = shca->maxmr;
1746         ib_pd = e_maxmr->ib.ib_mr.pd;
1747         shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
1748
1749         ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
1750         if (ret) {
1751                 ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
1752                          "ret=%i e_maxmr=%p shca=%p lkey=%x",
1753                          ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
1754                 shca->maxmr = e_maxmr;
1755                 goto ehca_dereg_internal_maxmr_exit0;
1756         }
1757
1758         atomic_dec(&ib_pd->usecnt);
1759
1760 ehca_dereg_internal_maxmr_exit0:
1761         if (ret)
1762                 ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
1763                          ret, shca, shca->maxmr);
1764         return ret;
1765 } /* end ehca_dereg_internal_maxmr() */
1766
1767 /*----------------------------------------------------------------------*/
1768
1769 /*
1770  * check physical buffer array of MR verbs for validness and
1771  * calculates MR size
1772  */
1773 int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
1774                                   int num_phys_buf,
1775                                   u64 *iova_start,
1776                                   u64 *size)
1777 {
1778         struct ib_phys_buf *pbuf = phys_buf_array;
1779         u64 size_count = 0;
1780         u32 i;
1781
1782         if (num_phys_buf == 0) {
1783                 ehca_gen_err("bad phys buf array len, num_phys_buf=0");
1784                 return -EINVAL;
1785         }
1786         /* check first buffer */
1787         if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
1788                 ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
1789                              "pbuf->addr=%llx pbuf->size=%llx",
1790                              iova_start, pbuf->addr, pbuf->size);
1791                 return -EINVAL;
1792         }
1793         if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
1794             (num_phys_buf > 1)) {
1795                 ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
1796                              "pbuf->size=%llx", pbuf->addr, pbuf->size);
1797                 return -EINVAL;
1798         }
1799
1800         for (i = 0; i < num_phys_buf; i++) {
1801                 if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
1802                         ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
1803                                      "pbuf->size=%llx",
1804                                      i, pbuf->addr, pbuf->size);
1805                         return -EINVAL;
1806                 }
1807                 if (((i > 0) && /* not 1st */
1808                      (i < (num_phys_buf - 1)) &&        /* not last */
1809                      (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
1810                         ehca_gen_err("bad size, i=%x pbuf->size=%llx",
1811                                      i, pbuf->size);
1812                         return -EINVAL;
1813                 }
1814                 size_count += pbuf->size;
1815                 pbuf++;
1816         }
1817
1818         *size = size_count;
1819         return 0;
1820 } /* end ehca_mr_chk_buf_and_calc_size() */
1821
1822 /*----------------------------------------------------------------------*/
1823
1824 /* check page list of map FMR verb for validness */
1825 int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1826                              u64 *page_list,
1827                              int list_len)
1828 {
1829         u32 i;
1830         u64 *page;
1831
1832         if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
1833                 ehca_gen_err("bad list_len, list_len=%x "
1834                              "e_fmr->fmr_max_pages=%x fmr=%p",
1835                              list_len, e_fmr->fmr_max_pages, e_fmr);
1836                 return -EINVAL;
1837         }
1838
1839         /* each page must be aligned */
1840         page = page_list;
1841         for (i = 0; i < list_len; i++) {
1842                 if (*page % e_fmr->fmr_page_size) {
1843                         ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
1844                                      "fmr_page_size=%x", i, *page, page, e_fmr,
1845                                      e_fmr->fmr_page_size);
1846                         return -EINVAL;
1847                 }
1848                 page++;
1849         }
1850
1851         return 0;
1852 } /* end ehca_fmr_check_page_list() */
1853
1854 /*----------------------------------------------------------------------*/
1855
1856 /* PAGE_SIZE >= pginfo->hwpage_size */
1857 static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1858                                   u32 number,
1859                                   u64 *kpage)
1860 {
1861         int ret = 0;
1862         struct ib_umem_chunk *prev_chunk;
1863         struct ib_umem_chunk *chunk;
1864         u64 pgaddr;
1865         u32 i = 0;
1866         u32 j = 0;
1867         int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
1868
1869         /* loop over desired chunk entries */
1870         chunk      = pginfo->u.usr.next_chunk;
1871         prev_chunk = pginfo->u.usr.next_chunk;
1872         list_for_each_entry_continue(
1873                 chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1874                 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1875                         pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
1876                                 << PAGE_SHIFT ;
1877                         *kpage = phys_to_abs(pgaddr +
1878                                              (pginfo->next_hwpage *
1879                                               pginfo->hwpage_size));
1880                         if ( !(*kpage) ) {
1881                                 ehca_gen_err("pgaddr=%llx "
1882                                              "chunk->page_list[i]=%llx "
1883                                              "i=%x next_hwpage=%llx",
1884                                              pgaddr, (u64)sg_dma_address(
1885                                                      &chunk->page_list[i]),
1886                                              i, pginfo->next_hwpage);
1887                                 return -EFAULT;
1888                         }
1889                         (pginfo->hwpage_cnt)++;
1890                         (pginfo->next_hwpage)++;
1891                         kpage++;
1892                         if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
1893                                 (pginfo->kpage_cnt)++;
1894                                 (pginfo->u.usr.next_nmap)++;
1895                                 pginfo->next_hwpage = 0;
1896                                 i++;
1897                         }
1898                         j++;
1899                         if (j >= number) break;
1900                 }
1901                 if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
1902                     (j >= number)) {
1903                         pginfo->u.usr.next_nmap = 0;
1904                         prev_chunk = chunk;
1905                         break;
1906                 } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
1907                         pginfo->u.usr.next_nmap = 0;
1908                         prev_chunk = chunk;
1909                 } else if (j >= number)
1910                         break;
1911                 else
1912                         prev_chunk = chunk;
1913         }
1914         pginfo->u.usr.next_chunk =
1915                 list_prepare_entry(prev_chunk,
1916                                    (&(pginfo->u.usr.region->chunk_list)),
1917                                    list);
1918         return ret;
1919 }
1920
1921 /*
1922  * check given pages for contiguous layout
1923  * last page addr is returned in prev_pgaddr for further check
1924  */
1925 static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
1926                                      int start_idx, int end_idx,
1927                                      u64 *prev_pgaddr)
1928 {
1929         int t;
1930         for (t = start_idx; t <= end_idx; t++) {
1931                 u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
1932                 if (ehca_debug_level >= 3)
1933                         ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
1934                                      *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
1935                 if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
1936                         ehca_gen_err("uncontiguous page found pgaddr=%llx "
1937                                      "prev_pgaddr=%llx page_list_i=%x",
1938                                      pgaddr, *prev_pgaddr, t);
1939                         return -EINVAL;
1940                 }
1941                 *prev_pgaddr = pgaddr;
1942         }
1943         return 0;
1944 }
1945
1946 /* PAGE_SIZE < pginfo->hwpage_size */
1947 static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1948                                   u32 number,
1949                                   u64 *kpage)
1950 {
1951         int ret = 0;
1952         struct ib_umem_chunk *prev_chunk;
1953         struct ib_umem_chunk *chunk;
1954         u64 pgaddr, prev_pgaddr;
1955         u32 i = 0;
1956         u32 j = 0;
1957         int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
1958         int nr_kpages = kpages_per_hwpage;
1959
1960         /* loop over desired chunk entries */
1961         chunk      = pginfo->u.usr.next_chunk;
1962         prev_chunk = pginfo->u.usr.next_chunk;
1963         list_for_each_entry_continue(
1964                 chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1965                 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1966                         if (nr_kpages == kpages_per_hwpage) {
1967                                 pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
1968                                            << PAGE_SHIFT );
1969                                 *kpage = phys_to_abs(pgaddr);
1970                                 if ( !(*kpage) ) {
1971                                         ehca_gen_err("pgaddr=%llx i=%x",
1972                                                      pgaddr, i);
1973                                         ret = -EFAULT;
1974                                         return ret;
1975                                 }
1976                                 /*
1977                                  * The first page in a hwpage must be aligned;
1978                                  * the first MR page is exempt from this rule.
1979                                  */
1980                                 if (pgaddr & (pginfo->hwpage_size - 1)) {
1981                                         if (pginfo->hwpage_cnt) {
1982                                                 ehca_gen_err(
1983                                                         "invalid alignment "
1984                                                         "pgaddr=%llx i=%x "
1985                                                         "mr_pgsize=%llx",
1986                                                         pgaddr, i,
1987                                                         pginfo->hwpage_size);
1988                                                 ret = -EFAULT;
1989                                                 return ret;
1990                                         }
1991                                         /* first MR page */
1992                                         pginfo->kpage_cnt =
1993                                                 (pgaddr &
1994                                                  (pginfo->hwpage_size - 1)) >>
1995                                                 PAGE_SHIFT;
1996                                         nr_kpages -= pginfo->kpage_cnt;
1997                                         *kpage = phys_to_abs(
1998                                                 pgaddr &
1999                                                 ~(pginfo->hwpage_size - 1));
2000                                 }
2001                                 if (ehca_debug_level >= 3) {
2002                                         u64 val = *(u64 *)abs_to_virt(
2003                                                 phys_to_abs(pgaddr));
2004                                         ehca_gen_dbg("kpage=%llx chunk_page=%llx "
2005                                                      "value=%016llx",
2006                                                      *kpage, pgaddr, val);
2007                                 }
2008                                 prev_pgaddr = pgaddr;
2009                                 i++;
2010                                 pginfo->kpage_cnt++;
2011                                 pginfo->u.usr.next_nmap++;
2012                                 nr_kpages--;
2013                                 if (!nr_kpages)
2014                                         goto next_kpage;
2015                                 continue;
2016                         }
2017                         if (i + nr_kpages > chunk->nmap) {
2018                                 ret = ehca_check_kpages_per_ate(
2019                                         chunk->page_list, i,
2020                                         chunk->nmap - 1, &prev_pgaddr);
2021                                 if (ret) return ret;
2022                                 pginfo->kpage_cnt += chunk->nmap - i;
2023                                 pginfo->u.usr.next_nmap += chunk->nmap - i;
2024                                 nr_kpages -= chunk->nmap - i;
2025                                 break;
2026                         }
2027
2028                         ret = ehca_check_kpages_per_ate(chunk->page_list, i,
2029                                                         i + nr_kpages - 1,
2030                                                         &prev_pgaddr);
2031                         if (ret) return ret;
2032                         i += nr_kpages;
2033                         pginfo->kpage_cnt += nr_kpages;
2034                         pginfo->u.usr.next_nmap += nr_kpages;
2035 next_kpage:
2036                         nr_kpages = kpages_per_hwpage;
2037                         (pginfo->hwpage_cnt)++;
2038                         kpage++;
2039                         j++;
2040                         if (j >= number) break;
2041                 }
2042                 if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
2043                     (j >= number)) {
2044                         pginfo->u.usr.next_nmap = 0;
2045                         prev_chunk = chunk;
2046                         break;
2047                 } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
2048                         pginfo->u.usr.next_nmap = 0;
2049                         prev_chunk = chunk;
2050                 } else if (j >= number)
2051                         break;
2052                 else
2053                         prev_chunk = chunk;
2054         }
2055         pginfo->u.usr.next_chunk =
2056                 list_prepare_entry(prev_chunk,
2057                                    (&(pginfo->u.usr.region->chunk_list)),
2058                                    list);
2059         return ret;
2060 }
2061
2062 static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
2063                                  u32 number, u64 *kpage)
2064 {
2065         int ret = 0;
2066         struct ib_phys_buf *pbuf;
2067         u64 num_hw, offs_hw;
2068         u32 i = 0;
2069
2070         /* loop over desired phys_buf_array entries */
2071         while (i < number) {
2072                 pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
2073                 num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
2074                                      pbuf->size, pginfo->hwpage_size);
2075                 offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
2076                         pginfo->hwpage_size;
2077                 while (pginfo->next_hwpage < offs_hw + num_hw) {
2078                         /* sanity check */
2079                         if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
2080                             (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
2081                                 ehca_gen_err("kpage_cnt >= num_kpages, "
2082                                              "kpage_cnt=%llx num_kpages=%llx "
2083                                              "hwpage_cnt=%llx "
2084                                              "num_hwpages=%llx i=%x",
2085                                              pginfo->kpage_cnt,
2086                                              pginfo->num_kpages,
2087                                              pginfo->hwpage_cnt,
2088                                              pginfo->num_hwpages, i);
2089                                 return -EFAULT;
2090                         }
2091                         *kpage = phys_to_abs(
2092                                 (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
2093                                 (pginfo->next_hwpage * pginfo->hwpage_size));
2094                         if ( !(*kpage) && pbuf->addr ) {
2095                                 ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
2096                                              "next_hwpage=%llx", pbuf->addr,
2097                                              pbuf->size, pginfo->next_hwpage);
2098                                 return -EFAULT;
2099                         }
2100                         (pginfo->hwpage_cnt)++;
2101                         (pginfo->next_hwpage)++;
2102                         if (PAGE_SIZE >= pginfo->hwpage_size) {
2103                                 if (pginfo->next_hwpage %
2104                                     (PAGE_SIZE / pginfo->hwpage_size) == 0)
2105                                         (pginfo->kpage_cnt)++;
2106                         } else
2107                                 pginfo->kpage_cnt += pginfo->hwpage_size /
2108                                         PAGE_SIZE;
2109                         kpage++;
2110                         i++;
2111                         if (i >= number) break;
2112                 }
2113                 if (pginfo->next_hwpage >= offs_hw + num_hw) {
2114                         (pginfo->u.phy.next_buf)++;
2115                         pginfo->next_hwpage = 0;
2116                 }
2117         }
2118         return ret;
2119 }
2120
2121 static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
2122                                 u32 number, u64 *kpage)
2123 {
2124         int ret = 0;
2125         u64 *fmrlist;
2126         u32 i;
2127
2128         /* loop over desired page_list entries */
2129         fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
2130         for (i = 0; i < number; i++) {
2131                 *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) +
2132                                      pginfo->next_hwpage * pginfo->hwpage_size);
2133                 if ( !(*kpage) ) {
2134                         ehca_gen_err("*fmrlist=%llx fmrlist=%p "
2135                                      "next_listelem=%llx next_hwpage=%llx",
2136                                      *fmrlist, fmrlist,
2137                                      pginfo->u.fmr.next_listelem,
2138                                      pginfo->next_hwpage);
2139                         return -EFAULT;
2140                 }
2141                 (pginfo->hwpage_cnt)++;
2142                 if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
2143                         if (pginfo->next_hwpage %
2144                             (pginfo->u.fmr.fmr_pgsize /
2145                              pginfo->hwpage_size) == 0) {
2146                                 (pginfo->kpage_cnt)++;
2147                                 (pginfo->u.fmr.next_listelem)++;
2148                                 fmrlist++;
2149                                 pginfo->next_hwpage = 0;
2150                         } else
2151                                 (pginfo->next_hwpage)++;
2152                 } else {
2153                         unsigned int cnt_per_hwpage = pginfo->hwpage_size /
2154                                 pginfo->u.fmr.fmr_pgsize;
2155                         unsigned int j;
2156                         u64 prev = *kpage;
2157                         /* check if adrs are contiguous */
2158                         for (j = 1; j < cnt_per_hwpage; j++) {
2159                                 u64 p = phys_to_abs(fmrlist[j] &
2160                                                     ~(pginfo->hwpage_size - 1));
2161                                 if (prev + pginfo->u.fmr.fmr_pgsize != p) {
2162                                         ehca_gen_err("uncontiguous fmr pages "
2163                                                      "found prev=%llx p=%llx "
2164                                                      "idx=%x", prev, p, i + j);
2165                                         return -EINVAL;
2166                                 }
2167                                 prev = p;
2168                         }
2169                         pginfo->kpage_cnt += cnt_per_hwpage;
2170                         pginfo->u.fmr.next_listelem += cnt_per_hwpage;
2171                         fmrlist += cnt_per_hwpage;
2172                 }
2173                 kpage++;
2174         }
2175         return ret;
2176 }
2177
2178 /* setup page buffer from page info */
2179 int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
2180                      u32 number,
2181                      u64 *kpage)
2182 {
2183         int ret;
2184
2185         switch (pginfo->type) {
2186         case EHCA_MR_PGI_PHYS:
2187                 ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
2188                 break;
2189         case EHCA_MR_PGI_USER:
2190                 ret = PAGE_SIZE >= pginfo->hwpage_size ?
2191                         ehca_set_pagebuf_user1(pginfo, number, kpage) :
2192                         ehca_set_pagebuf_user2(pginfo, number, kpage);
2193                 break;
2194         case EHCA_MR_PGI_FMR:
2195                 ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
2196                 break;
2197         default:
2198                 ehca_gen_err("bad pginfo->type=%x", pginfo->type);
2199                 ret = -EFAULT;
2200                 break;
2201         }
2202         return ret;
2203 } /* end ehca_set_pagebuf() */
2204
2205 /*----------------------------------------------------------------------*/
2206
2207 /*
2208  * check MR if it is a max-MR, i.e. uses whole memory
2209  * in case it's a max-MR 1 is returned, else 0
2210  */
2211 int ehca_mr_is_maxmr(u64 size,
2212                      u64 *iova_start)
2213 {
2214         /* a MR is treated as max-MR only if it fits following: */
2215         if ((size == ehca_mr_len) &&
2216             (iova_start == (void *)ehca_map_vaddr((void *)KERNELBASE))) {
2217                 ehca_gen_dbg("this is a max-MR");
2218                 return 1;
2219         } else
2220                 return 0;
2221 } /* end ehca_mr_is_maxmr() */
2222
2223 /*----------------------------------------------------------------------*/
2224
2225 /* map access control for MR/MW. This routine is used for MR and MW. */
2226 void ehca_mrmw_map_acl(int ib_acl,
2227                        u32 *hipz_acl)
2228 {
2229         *hipz_acl = 0;
2230         if (ib_acl & IB_ACCESS_REMOTE_READ)
2231                 *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
2232         if (ib_acl & IB_ACCESS_REMOTE_WRITE)
2233                 *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
2234         if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
2235                 *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
2236         if (ib_acl & IB_ACCESS_LOCAL_WRITE)
2237                 *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
2238         if (ib_acl & IB_ACCESS_MW_BIND)
2239                 *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
2240 } /* end ehca_mrmw_map_acl() */
2241
2242 /*----------------------------------------------------------------------*/
2243
2244 /* sets page size in hipz access control for MR/MW. */
2245 void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
2246 {
2247         *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
2248 } /* end ehca_mrmw_set_pgsize_hipz_acl() */
2249
2250 /*----------------------------------------------------------------------*/
2251
2252 /*
2253  * reverse map access control for MR/MW.
2254  * This routine is used for MR and MW.
2255  */
2256 void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
2257                                int *ib_acl) /*OUT*/
2258 {
2259         *ib_acl = 0;
2260         if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
2261                 *ib_acl |= IB_ACCESS_REMOTE_READ;
2262         if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
2263                 *ib_acl |= IB_ACCESS_REMOTE_WRITE;
2264         if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
2265                 *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
2266         if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
2267                 *ib_acl |= IB_ACCESS_LOCAL_WRITE;
2268         if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
2269                 *ib_acl |= IB_ACCESS_MW_BIND;
2270 } /* end ehca_mrmw_reverse_map_acl() */
2271
2272
2273 /*----------------------------------------------------------------------*/
2274
2275 /*
2276  * MR destructor and constructor
2277  * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
2278  * except struct ib_mr and spinlock
2279  */
2280 void ehca_mr_deletenew(struct ehca_mr *mr)
2281 {
2282         mr->flags = 0;
2283         mr->num_kpages = 0;
2284         mr->num_hwpages = 0;
2285         mr->acl = 0;
2286         mr->start = NULL;
2287         mr->fmr_page_size = 0;
2288         mr->fmr_max_pages = 0;
2289         mr->fmr_max_maps = 0;
2290         mr->fmr_map_cnt = 0;
2291         memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
2292         memset(&mr->galpas, 0, sizeof(mr->galpas));
2293 } /* end ehca_mr_deletenew() */
2294
2295 int ehca_init_mrmw_cache(void)
2296 {
2297         mr_cache = kmem_cache_create("ehca_cache_mr",
2298                                      sizeof(struct ehca_mr), 0,
2299                                      SLAB_HWCACHE_ALIGN,
2300                                      NULL);
2301         if (!mr_cache)
2302                 return -ENOMEM;
2303         mw_cache = kmem_cache_create("ehca_cache_mw",
2304                                      sizeof(struct ehca_mw), 0,
2305                                      SLAB_HWCACHE_ALIGN,
2306                                      NULL);
2307         if (!mw_cache) {
2308                 kmem_cache_destroy(mr_cache);
2309                 mr_cache = NULL;
2310                 return -ENOMEM;
2311         }
2312         return 0;
2313 }
2314
2315 void ehca_cleanup_mrmw_cache(void)
2316 {
2317         if (mr_cache)
2318                 kmem_cache_destroy(mr_cache);
2319         if (mw_cache)
2320                 kmem_cache_destroy(mw_cache);
2321 }
2322
2323 static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
2324                                      int dir)
2325 {
2326         if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
2327                 ehca_top_bmap->dir[dir] =
2328                         kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
2329                 if (!ehca_top_bmap->dir[dir])
2330                         return -ENOMEM;
2331                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2332                 memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
2333         }
2334         return 0;
2335 }
2336
2337 static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
2338 {
2339         if (!ehca_bmap_valid(ehca_bmap->top[top])) {
2340                 ehca_bmap->top[top] =
2341                         kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
2342                 if (!ehca_bmap->top[top])
2343                         return -ENOMEM;
2344                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2345                 memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
2346         }
2347         return ehca_init_top_bmap(ehca_bmap->top[top], dir);
2348 }
2349
2350 static inline int ehca_calc_index(unsigned long i, unsigned long s)
2351 {
2352         return (i >> s) & EHCA_INDEX_MASK;
2353 }
2354
2355 void ehca_destroy_busmap(void)
2356 {
2357         int top, dir;
2358
2359         if (!ehca_bmap)
2360                 return;
2361
2362         for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2363                 if (!ehca_bmap_valid(ehca_bmap->top[top]))
2364                         continue;
2365                 for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
2366                         if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2367                                 continue;
2368
2369                         kfree(ehca_bmap->top[top]->dir[dir]);
2370                 }
2371
2372                 kfree(ehca_bmap->top[top]);
2373         }
2374
2375         kfree(ehca_bmap);
2376         ehca_bmap = NULL;
2377 }
2378
2379 static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
2380 {
2381         unsigned long i, start_section, end_section;
2382         int top, dir, idx;
2383
2384         if (!nr_pages)
2385                 return 0;
2386
2387         if (!ehca_bmap) {
2388                 ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
2389                 if (!ehca_bmap)
2390                         return -ENOMEM;
2391                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2392                 memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
2393         }
2394
2395         start_section = phys_to_abs(pfn * PAGE_SIZE) / EHCA_SECTSIZE;
2396         end_section = phys_to_abs((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
2397         for (i = start_section; i < end_section; i++) {
2398                 int ret;
2399                 top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
2400                 dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
2401                 idx = i & EHCA_INDEX_MASK;
2402
2403                 ret = ehca_init_bmap(ehca_bmap, top, dir);
2404                 if (ret) {
2405                         ehca_destroy_busmap();
2406                         return ret;
2407                 }
2408                 ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
2409                 ehca_mr_len += EHCA_SECTSIZE;
2410         }
2411         return 0;
2412 }
2413
2414 static int ehca_is_hugepage(unsigned long pfn)
2415 {
2416         int page_order;
2417
2418         if (pfn & EHCA_HUGEPAGE_PFN_MASK)
2419                 return 0;
2420
2421         page_order = compound_order(pfn_to_page(pfn));
2422         if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
2423                 return 0;
2424
2425         return 1;
2426 }
2427
2428 static int ehca_create_busmap_callback(unsigned long initial_pfn,
2429                                        unsigned long total_nr_pages, void *arg)
2430 {
2431         int ret;
2432         unsigned long pfn, start_pfn, end_pfn, nr_pages;
2433
2434         if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
2435                 return ehca_update_busmap(initial_pfn, total_nr_pages);
2436
2437         /* Given chunk is >= 16GB -> check for hugepages */
2438         start_pfn = initial_pfn;
2439         end_pfn = initial_pfn + total_nr_pages;
2440         pfn = start_pfn;
2441
2442         while (pfn < end_pfn) {
2443                 if (ehca_is_hugepage(pfn)) {
2444                         /* Add mem found in front of the hugepage */
2445                         nr_pages = pfn - start_pfn;
2446                         ret = ehca_update_busmap(start_pfn, nr_pages);
2447                         if (ret)
2448                                 return ret;
2449                         /* Skip the hugepage */
2450                         pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
2451                         start_pfn = pfn;
2452                 } else
2453                         pfn += (EHCA_SECTSIZE / PAGE_SIZE);
2454         }
2455
2456         /* Add mem found behind the hugepage(s)  */
2457         nr_pages = pfn - start_pfn;
2458         return ehca_update_busmap(start_pfn, nr_pages);
2459 }
2460
2461 int ehca_create_busmap(void)
2462 {
2463         int ret;
2464
2465         ehca_mr_len = 0;
2466         ret = walk_memory_resource(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
2467                                    ehca_create_busmap_callback);
2468         return ret;
2469 }
2470
2471 static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
2472                                    struct ehca_mr *e_mr,
2473                                    struct ehca_mr_pginfo *pginfo)
2474 {
2475         int top;
2476         u64 hret, *kpage;
2477
2478         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
2479         if (!kpage) {
2480                 ehca_err(&shca->ib_device, "kpage alloc failed");
2481                 return -ENOMEM;
2482         }
2483         for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2484                 if (!ehca_bmap_valid(ehca_bmap->top[top]))
2485                         continue;
2486                 hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
2487                 if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
2488                         break;
2489         }
2490
2491         ehca_free_fw_ctrlblock(kpage);
2492
2493         if (hret == H_SUCCESS)
2494                 return 0; /* Everything is fine */
2495         else {
2496                 ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
2497                                  "h_ret=%lli e_mr=%p top=%x lkey=%x "
2498                                  "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
2499                                  e_mr->ib.ib_mr.lkey,
2500                                  shca->ipz_hca_handle.handle,
2501                                  e_mr->ipz_mr_handle.handle);
2502                 return ehca2ib_return_code(hret);
2503         }
2504 }
2505
2506 static u64 ehca_map_vaddr(void *caddr)
2507 {
2508         int top, dir, idx;
2509         unsigned long abs_addr, offset;
2510         u64 entry;
2511
2512         if (!ehca_bmap)
2513                 return EHCA_INVAL_ADDR;
2514
2515         abs_addr = virt_to_abs(caddr);
2516         top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
2517         if (!ehca_bmap_valid(ehca_bmap->top[top]))
2518                 return EHCA_INVAL_ADDR;
2519
2520         dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
2521         if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2522                 return EHCA_INVAL_ADDR;
2523
2524         idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
2525
2526         entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
2527         if (ehca_bmap_valid(entry)) {
2528                 offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
2529                 return entry | offset;
2530         } else
2531                 return EHCA_INVAL_ADDR;
2532 }
2533
2534 static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
2535 {
2536         return dma_addr == EHCA_INVAL_ADDR;
2537 }
2538
2539 static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
2540                                size_t size, enum dma_data_direction direction)
2541 {
2542         if (cpu_addr)
2543                 return ehca_map_vaddr(cpu_addr);
2544         else
2545                 return EHCA_INVAL_ADDR;
2546 }
2547
2548 static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
2549                                   enum dma_data_direction direction)
2550 {
2551         /* This is only a stub; nothing to be done here */
2552 }
2553
2554 static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
2555                              unsigned long offset, size_t size,
2556                              enum dma_data_direction direction)
2557 {
2558         u64 addr;
2559
2560         if (offset + size > PAGE_SIZE)
2561                 return EHCA_INVAL_ADDR;
2562
2563         addr = ehca_map_vaddr(page_address(page));
2564         if (!ehca_dma_mapping_error(dev, addr))
2565                 addr += offset;
2566
2567         return addr;
2568 }
2569
2570 static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
2571                                 enum dma_data_direction direction)
2572 {
2573         /* This is only a stub; nothing to be done here */
2574 }
2575
2576 static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
2577                            int nents, enum dma_data_direction direction)
2578 {
2579         struct scatterlist *sg;
2580         int i;
2581
2582         for_each_sg(sgl, sg, nents, i) {
2583                 u64 addr;
2584                 addr = ehca_map_vaddr(sg_virt(sg));
2585                 if (ehca_dma_mapping_error(dev, addr))
2586                         return 0;
2587
2588                 sg->dma_address = addr;
2589                 sg->dma_length = sg->length;
2590         }
2591         return nents;
2592 }
2593
2594 static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
2595                               int nents, enum dma_data_direction direction)
2596 {
2597         /* This is only a stub; nothing to be done here */
2598 }
2599
2600 static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg)
2601 {
2602         return sg->dma_address;
2603 }
2604
2605 static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg)
2606 {
2607         return sg->length;
2608 }
2609
2610 static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
2611                                          size_t size,
2612                                          enum dma_data_direction dir)
2613 {
2614         dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
2615 }
2616
2617 static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
2618                                             size_t size,
2619                                             enum dma_data_direction dir)
2620 {
2621         dma_sync_single_for_device(dev->dma_device, addr, size, dir);
2622 }
2623
2624 static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
2625                                      u64 *dma_handle, gfp_t flag)
2626 {
2627         struct page *p;
2628         void *addr = NULL;
2629         u64 dma_addr;
2630
2631         p = alloc_pages(flag, get_order(size));
2632         if (p) {
2633                 addr = page_address(p);
2634                 dma_addr = ehca_map_vaddr(addr);
2635                 if (ehca_dma_mapping_error(dev, dma_addr)) {
2636                         free_pages((unsigned long)addr, get_order(size));
2637                         return NULL;
2638                 }
2639                 if (dma_handle)
2640                         *dma_handle = dma_addr;
2641                 return addr;
2642         }
2643         return NULL;
2644 }
2645
2646 static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
2647                                    void *cpu_addr, u64 dma_handle)
2648 {
2649         if (cpu_addr && size)
2650                 free_pages((unsigned long)cpu_addr, get_order(size));
2651 }
2652
2653
2654 struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
2655         .mapping_error          = ehca_dma_mapping_error,
2656         .map_single             = ehca_dma_map_single,
2657         .unmap_single           = ehca_dma_unmap_single,
2658         .map_page               = ehca_dma_map_page,
2659         .unmap_page             = ehca_dma_unmap_page,
2660         .map_sg                 = ehca_dma_map_sg,
2661         .unmap_sg               = ehca_dma_unmap_sg,
2662         .dma_address            = ehca_dma_address,
2663         .dma_len                = ehca_dma_len,
2664         .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
2665         .sync_single_for_device = ehca_dma_sync_single_for_device,
2666         .alloc_coherent         = ehca_dma_alloc_coherent,
2667         .free_coherent          = ehca_dma_free_coherent,
2668 };