Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[linux-2.6.git] / drivers / misc / sgi-xp / xpc_uv.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9 /*
10  * Cross Partition Communication (XPC) uv-based functions.
11  *
12  *     Architecture specific implementation of common functions.
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/interrupt.h>
19 #include <linux/delay.h>
20 #include <linux/device.h>
21 #include <linux/err.h>
22 #include <asm/uv/uv_hub.h>
23 #if defined CONFIG_X86_64
24 #include <asm/uv/bios.h>
25 #include <asm/uv/uv_irq.h>
26 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
27 #include <asm/sn/intr.h>
28 #include <asm/sn/sn_sal.h>
29 #endif
30 #include "../sgi-gru/gru.h"
31 #include "../sgi-gru/grukservices.h"
32 #include "xpc.h"
33
34 #if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
35 struct uv_IO_APIC_route_entry {
36         __u64   vector          :  8,
37                 delivery_mode   :  3,
38                 dest_mode       :  1,
39                 delivery_status :  1,
40                 polarity        :  1,
41                 __reserved_1    :  1,
42                 trigger         :  1,
43                 mask            :  1,
44                 __reserved_2    : 15,
45                 dest            : 32;
46 };
47 #endif
48
49 static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
50
51 #define XPC_ACTIVATE_MSG_SIZE_UV        (1 * GRU_CACHE_LINE_BYTES)
52 #define XPC_ACTIVATE_MQ_SIZE_UV         (4 * XP_MAX_NPARTITIONS_UV * \
53                                          XPC_ACTIVATE_MSG_SIZE_UV)
54 #define XPC_ACTIVATE_IRQ_NAME           "xpc_activate"
55
56 #define XPC_NOTIFY_MSG_SIZE_UV          (2 * GRU_CACHE_LINE_BYTES)
57 #define XPC_NOTIFY_MQ_SIZE_UV           (4 * XP_MAX_NPARTITIONS_UV * \
58                                          XPC_NOTIFY_MSG_SIZE_UV)
59 #define XPC_NOTIFY_IRQ_NAME             "xpc_notify"
60
61 static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
62 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
63
64 static int
65 xpc_setup_partitions_uv(void)
66 {
67         short partid;
68         struct xpc_partition_uv *part_uv;
69
70         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
71                 part_uv = &xpc_partitions[partid].sn.uv;
72
73                 mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
74                 spin_lock_init(&part_uv->flags_lock);
75                 part_uv->remote_act_state = XPC_P_AS_INACTIVE;
76         }
77         return 0;
78 }
79
80 static void
81 xpc_teardown_partitions_uv(void)
82 {
83         short partid;
84         struct xpc_partition_uv *part_uv;
85         unsigned long irq_flags;
86
87         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
88                 part_uv = &xpc_partitions[partid].sn.uv;
89
90                 if (part_uv->cached_activate_gru_mq_desc != NULL) {
91                         mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
92                         spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
93                         part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
94                         spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
95                         kfree(part_uv->cached_activate_gru_mq_desc);
96                         part_uv->cached_activate_gru_mq_desc = NULL;
97                         mutex_unlock(&part_uv->
98                                      cached_activate_gru_mq_desc_mutex);
99                 }
100         }
101 }
102
103 static int
104 xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
105 {
106         int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
107
108 #if defined CONFIG_X86_64
109         mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
110                         UV_AFFINITY_CPU);
111         if (mq->irq < 0) {
112                 dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
113                         -mq->irq);
114                 return mq->irq;
115         }
116
117         mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
118
119 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
120         if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
121                 mq->irq = SGI_XPC_ACTIVATE;
122         else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
123                 mq->irq = SGI_XPC_NOTIFY;
124         else
125                 return -EINVAL;
126
127         mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
128         uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value);
129 #else
130         #error not a supported configuration
131 #endif
132
133         return 0;
134 }
135
136 static void
137 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
138 {
139 #if defined CONFIG_X86_64
140         uv_teardown_irq(mq->irq);
141
142 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
143         int mmr_pnode;
144         unsigned long mmr_value;
145
146         mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
147         mmr_value = 1UL << 16;
148
149         uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
150 #else
151         #error not a supported configuration
152 #endif
153 }
154
155 static int
156 xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
157 {
158         int ret;
159
160 #if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
161         int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
162
163         ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address),
164                                     mq->order, &mq->mmr_offset);
165         if (ret < 0) {
166                 dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
167                         ret);
168                 return -EBUSY;
169         }
170 #elif defined CONFIG_X86_64
171         ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address),
172                                          mq->order, &mq->mmr_offset);
173         if (ret < 0) {
174                 dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
175                         "ret=%d\n", ret);
176                 return ret;
177         }
178 #else
179         #error not a supported configuration
180 #endif
181
182         mq->watchlist_num = ret;
183         return 0;
184 }
185
186 static void
187 xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
188 {
189         int ret;
190         int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
191
192 #if defined CONFIG_X86_64
193         ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
194         BUG_ON(ret != BIOS_STATUS_SUCCESS);
195 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
196         ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
197         BUG_ON(ret != SALRET_OK);
198 #else
199         #error not a supported configuration
200 #endif
201 }
202
203 static struct xpc_gru_mq_uv *
204 xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
205                      irq_handler_t irq_handler)
206 {
207         enum xp_retval xp_ret;
208         int ret;
209         int nid;
210         int nasid;
211         int pg_order;
212         struct page *page;
213         struct xpc_gru_mq_uv *mq;
214         struct uv_IO_APIC_route_entry *mmr_value;
215
216         mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
217         if (mq == NULL) {
218                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
219                         "a xpc_gru_mq_uv structure\n");
220                 ret = -ENOMEM;
221                 goto out_0;
222         }
223
224         mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc),
225                                   GFP_KERNEL);
226         if (mq->gru_mq_desc == NULL) {
227                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
228                         "a gru_message_queue_desc structure\n");
229                 ret = -ENOMEM;
230                 goto out_1;
231         }
232
233         pg_order = get_order(mq_size);
234         mq->order = pg_order + PAGE_SHIFT;
235         mq_size = 1UL << mq->order;
236
237         mq->mmr_blade = uv_cpu_to_blade_id(cpu);
238
239         nid = cpu_to_node(cpu);
240         page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
241                                 pg_order);
242         if (page == NULL) {
243                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
244                         "bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
245                 ret = -ENOMEM;
246                 goto out_2;
247         }
248         mq->address = page_address(page);
249
250         /* enable generation of irq when GRU mq operation occurs to this mq */
251         ret = xpc_gru_mq_watchlist_alloc_uv(mq);
252         if (ret != 0)
253                 goto out_3;
254
255         ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
256         if (ret != 0)
257                 goto out_4;
258
259         ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
260         if (ret != 0) {
261                 dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
262                         mq->irq, -ret);
263                 goto out_5;
264         }
265
266         nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu));
267
268         mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
269         ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
270                                      nasid, mmr_value->vector, mmr_value->dest);
271         if (ret != 0) {
272                 dev_err(xpc_part, "gru_create_message_queue() returned "
273                         "error=%d\n", ret);
274                 ret = -EINVAL;
275                 goto out_6;
276         }
277
278         /* allow other partitions to access this GRU mq */
279         xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
280         if (xp_ret != xpSuccess) {
281                 ret = -EACCES;
282                 goto out_6;
283         }
284
285         return mq;
286
287         /* something went wrong */
288 out_6:
289         free_irq(mq->irq, NULL);
290 out_5:
291         xpc_release_gru_mq_irq_uv(mq);
292 out_4:
293         xpc_gru_mq_watchlist_free_uv(mq);
294 out_3:
295         free_pages((unsigned long)mq->address, pg_order);
296 out_2:
297         kfree(mq->gru_mq_desc);
298 out_1:
299         kfree(mq);
300 out_0:
301         return ERR_PTR(ret);
302 }
303
304 static void
305 xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
306 {
307         unsigned int mq_size;
308         int pg_order;
309         int ret;
310
311         /* disallow other partitions to access GRU mq */
312         mq_size = 1UL << mq->order;
313         ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
314         BUG_ON(ret != xpSuccess);
315
316         /* unregister irq handler and release mq irq/vector mapping */
317         free_irq(mq->irq, NULL);
318         xpc_release_gru_mq_irq_uv(mq);
319
320         /* disable generation of irq when GRU mq op occurs to this mq */
321         xpc_gru_mq_watchlist_free_uv(mq);
322
323         pg_order = mq->order - PAGE_SHIFT;
324         free_pages((unsigned long)mq->address, pg_order);
325
326         kfree(mq);
327 }
328
329 static enum xp_retval
330 xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
331                  size_t msg_size)
332 {
333         enum xp_retval xp_ret;
334         int ret;
335
336         while (1) {
337                 ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
338                 if (ret == MQE_OK) {
339                         xp_ret = xpSuccess;
340                         break;
341                 }
342
343                 if (ret == MQE_QUEUE_FULL) {
344                         dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
345                                 "error=MQE_QUEUE_FULL\n");
346                         /* !!! handle QLimit reached; delay & try again */
347                         /* ??? Do we add a limit to the number of retries? */
348                         (void)msleep_interruptible(10);
349                 } else if (ret == MQE_CONGESTION) {
350                         dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
351                                 "error=MQE_CONGESTION\n");
352                         /* !!! handle LB Overflow; simply try again */
353                         /* ??? Do we add a limit to the number of retries? */
354                 } else {
355                         /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
356                         dev_err(xpc_chan, "gru_send_message_gpa() returned "
357                                 "error=%d\n", ret);
358                         xp_ret = xpGruSendMqError;
359                         break;
360                 }
361         }
362         return xp_ret;
363 }
364
365 static void
366 xpc_process_activate_IRQ_rcvd_uv(void)
367 {
368         unsigned long irq_flags;
369         short partid;
370         struct xpc_partition *part;
371         u8 act_state_req;
372
373         DBUG_ON(xpc_activate_IRQ_rcvd == 0);
374
375         spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
376         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
377                 part = &xpc_partitions[partid];
378
379                 if (part->sn.uv.act_state_req == 0)
380                         continue;
381
382                 xpc_activate_IRQ_rcvd--;
383                 BUG_ON(xpc_activate_IRQ_rcvd < 0);
384
385                 act_state_req = part->sn.uv.act_state_req;
386                 part->sn.uv.act_state_req = 0;
387                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
388
389                 if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
390                         if (part->act_state == XPC_P_AS_INACTIVE)
391                                 xpc_activate_partition(part);
392                         else if (part->act_state == XPC_P_AS_DEACTIVATING)
393                                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
394
395                 } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
396                         if (part->act_state == XPC_P_AS_INACTIVE)
397                                 xpc_activate_partition(part);
398                         else
399                                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
400
401                 } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
402                         XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
403
404                 } else {
405                         BUG();
406                 }
407
408                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
409                 if (xpc_activate_IRQ_rcvd == 0)
410                         break;
411         }
412         spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
413
414 }
415
416 static void
417 xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
418                               struct xpc_activate_mq_msghdr_uv *msg_hdr,
419                               int *wakeup_hb_checker)
420 {
421         unsigned long irq_flags;
422         struct xpc_partition_uv *part_uv = &part->sn.uv;
423         struct xpc_openclose_args *args;
424
425         part_uv->remote_act_state = msg_hdr->act_state;
426
427         switch (msg_hdr->type) {
428         case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
429                 /* syncing of remote_act_state was just done above */
430                 break;
431
432         case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
433                 struct xpc_activate_mq_msg_activate_req_uv *msg;
434
435                 /*
436                  * ??? Do we deal here with ts_jiffies being different
437                  * ??? if act_state != XPC_P_AS_INACTIVE instead of
438                  * ??? below?
439                  */
440                 msg = container_of(msg_hdr, struct
441                                    xpc_activate_mq_msg_activate_req_uv, hdr);
442
443                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
444                 if (part_uv->act_state_req == 0)
445                         xpc_activate_IRQ_rcvd++;
446                 part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
447                 part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
448                 part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
449                 part_uv->heartbeat_gpa = msg->heartbeat_gpa;
450
451                 if (msg->activate_gru_mq_desc_gpa !=
452                     part_uv->activate_gru_mq_desc_gpa) {
453                         spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
454                         part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
455                         spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
456                         part_uv->activate_gru_mq_desc_gpa =
457                             msg->activate_gru_mq_desc_gpa;
458                 }
459                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
460
461                 (*wakeup_hb_checker)++;
462                 break;
463         }
464         case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
465                 struct xpc_activate_mq_msg_deactivate_req_uv *msg;
466
467                 msg = container_of(msg_hdr, struct
468                                    xpc_activate_mq_msg_deactivate_req_uv, hdr);
469
470                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
471                 if (part_uv->act_state_req == 0)
472                         xpc_activate_IRQ_rcvd++;
473                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
474                 part_uv->reason = msg->reason;
475                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
476
477                 (*wakeup_hb_checker)++;
478                 return;
479         }
480         case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
481                 struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
482
483                 msg = container_of(msg_hdr, struct
484                                    xpc_activate_mq_msg_chctl_closerequest_uv,
485                                    hdr);
486                 args = &part->remote_openclose_args[msg->ch_number];
487                 args->reason = msg->reason;
488
489                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
490                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
491                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
492
493                 xpc_wakeup_channel_mgr(part);
494                 break;
495         }
496         case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
497                 struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
498
499                 msg = container_of(msg_hdr, struct
500                                    xpc_activate_mq_msg_chctl_closereply_uv,
501                                    hdr);
502
503                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
504                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
505                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
506
507                 xpc_wakeup_channel_mgr(part);
508                 break;
509         }
510         case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
511                 struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
512
513                 msg = container_of(msg_hdr, struct
514                                    xpc_activate_mq_msg_chctl_openrequest_uv,
515                                    hdr);
516                 args = &part->remote_openclose_args[msg->ch_number];
517                 args->entry_size = msg->entry_size;
518                 args->local_nentries = msg->local_nentries;
519
520                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
521                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
522                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
523
524                 xpc_wakeup_channel_mgr(part);
525                 break;
526         }
527         case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
528                 struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
529
530                 msg = container_of(msg_hdr, struct
531                                    xpc_activate_mq_msg_chctl_openreply_uv, hdr);
532                 args = &part->remote_openclose_args[msg->ch_number];
533                 args->remote_nentries = msg->remote_nentries;
534                 args->local_nentries = msg->local_nentries;
535                 args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
536
537                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
538                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
539                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
540
541                 xpc_wakeup_channel_mgr(part);
542                 break;
543         }
544         case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
545                 struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
546
547                 msg = container_of(msg_hdr, struct
548                                 xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
549                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
550                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
551                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
552
553                 xpc_wakeup_channel_mgr(part);
554         }
555         case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
556                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
557                 part_uv->flags |= XPC_P_ENGAGED_UV;
558                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
559                 break;
560
561         case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
562                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
563                 part_uv->flags &= ~XPC_P_ENGAGED_UV;
564                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
565                 break;
566
567         default:
568                 dev_err(xpc_part, "received unknown activate_mq msg type=%d "
569                         "from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
570
571                 /* get hb checker to deactivate from the remote partition */
572                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
573                 if (part_uv->act_state_req == 0)
574                         xpc_activate_IRQ_rcvd++;
575                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
576                 part_uv->reason = xpBadMsgType;
577                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
578
579                 (*wakeup_hb_checker)++;
580                 return;
581         }
582
583         if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
584             part->remote_rp_ts_jiffies != 0) {
585                 /*
586                  * ??? Does what we do here need to be sensitive to
587                  * ??? act_state or remote_act_state?
588                  */
589                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
590                 if (part_uv->act_state_req == 0)
591                         xpc_activate_IRQ_rcvd++;
592                 part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
593                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
594
595                 (*wakeup_hb_checker)++;
596         }
597 }
598
599 static irqreturn_t
600 xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
601 {
602         struct xpc_activate_mq_msghdr_uv *msg_hdr;
603         short partid;
604         struct xpc_partition *part;
605         int wakeup_hb_checker = 0;
606         int part_referenced;
607
608         while (1) {
609                 msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
610                 if (msg_hdr == NULL)
611                         break;
612
613                 partid = msg_hdr->partid;
614                 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
615                         dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
616                                 "received invalid partid=0x%x in message\n",
617                                 partid);
618                 } else {
619                         part = &xpc_partitions[partid];
620
621                         part_referenced = xpc_part_ref(part);
622                         xpc_handle_activate_mq_msg_uv(part, msg_hdr,
623                                                       &wakeup_hb_checker);
624                         if (part_referenced)
625                                 xpc_part_deref(part);
626                 }
627
628                 gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
629         }
630
631         if (wakeup_hb_checker)
632                 wake_up_interruptible(&xpc_activate_IRQ_wq);
633
634         return IRQ_HANDLED;
635 }
636
637 static enum xp_retval
638 xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
639                                 unsigned long gru_mq_desc_gpa)
640 {
641         enum xp_retval ret;
642
643         ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
644                                sizeof(struct gru_message_queue_desc));
645         if (ret == xpSuccess)
646                 gru_mq_desc->mq = NULL;
647
648         return ret;
649 }
650
651 static enum xp_retval
652 xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
653                          int msg_type)
654 {
655         struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
656         struct xpc_partition_uv *part_uv = &part->sn.uv;
657         struct gru_message_queue_desc *gru_mq_desc;
658         unsigned long irq_flags;
659         enum xp_retval ret;
660
661         DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
662
663         msg_hdr->type = msg_type;
664         msg_hdr->partid = xp_partition_id;
665         msg_hdr->act_state = part->act_state;
666         msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
667
668         mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
669 again:
670         if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
671                 gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
672                 if (gru_mq_desc == NULL) {
673                         gru_mq_desc = kmalloc(sizeof(struct
674                                               gru_message_queue_desc),
675                                               GFP_KERNEL);
676                         if (gru_mq_desc == NULL) {
677                                 ret = xpNoMemory;
678                                 goto done;
679                         }
680                         part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
681                 }
682
683                 ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
684                                                       part_uv->
685                                                       activate_gru_mq_desc_gpa);
686                 if (ret != xpSuccess)
687                         goto done;
688
689                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
690                 part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
691                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
692         }
693
694         /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
695         ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
696                                msg_size);
697         if (ret != xpSuccess) {
698                 smp_rmb();      /* ensure a fresh copy of part_uv->flags */
699                 if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
700                         goto again;
701         }
702 done:
703         mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
704         return ret;
705 }
706
707 static void
708 xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
709                               size_t msg_size, int msg_type)
710 {
711         enum xp_retval ret;
712
713         ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
714         if (unlikely(ret != xpSuccess))
715                 XPC_DEACTIVATE_PARTITION(part, ret);
716 }
717
718 static void
719 xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
720                          void *msg, size_t msg_size, int msg_type)
721 {
722         struct xpc_partition *part = &xpc_partitions[ch->partid];
723         enum xp_retval ret;
724
725         ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
726         if (unlikely(ret != xpSuccess)) {
727                 if (irq_flags != NULL)
728                         spin_unlock_irqrestore(&ch->lock, *irq_flags);
729
730                 XPC_DEACTIVATE_PARTITION(part, ret);
731
732                 if (irq_flags != NULL)
733                         spin_lock_irqsave(&ch->lock, *irq_flags);
734         }
735 }
736
737 static void
738 xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
739 {
740         unsigned long irq_flags;
741         struct xpc_partition_uv *part_uv = &part->sn.uv;
742
743         /*
744          * !!! Make our side think that the remote partition sent an activate
745          * !!! mq message our way by doing what the activate IRQ handler would
746          * !!! do had one really been sent.
747          */
748
749         spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
750         if (part_uv->act_state_req == 0)
751                 xpc_activate_IRQ_rcvd++;
752         part_uv->act_state_req = act_state_req;
753         spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
754
755         wake_up_interruptible(&xpc_activate_IRQ_wq);
756 }
757
758 static enum xp_retval
759 xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
760                                   size_t *len)
761 {
762         s64 status;
763         enum xp_retval ret;
764
765 #if defined CONFIG_X86_64
766         status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
767                                           (u64 *)len);
768         if (status == BIOS_STATUS_SUCCESS)
769                 ret = xpSuccess;
770         else if (status == BIOS_STATUS_MORE_PASSES)
771                 ret = xpNeedMoreInfo;
772         else
773                 ret = xpBiosError;
774
775 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
776         status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
777         if (status == SALRET_OK)
778                 ret = xpSuccess;
779         else if (status == SALRET_MORE_PASSES)
780                 ret = xpNeedMoreInfo;
781         else
782                 ret = xpSalError;
783
784 #else
785         #error not a supported configuration
786 #endif
787
788         return ret;
789 }
790
791 static int
792 xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
793 {
794         xpc_heartbeat_uv =
795             &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
796         rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
797         rp->sn.uv.activate_gru_mq_desc_gpa =
798             uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
799         return 0;
800 }
801
802 static void
803 xpc_allow_hb_uv(short partid)
804 {
805 }
806
807 static void
808 xpc_disallow_hb_uv(short partid)
809 {
810 }
811
812 static void
813 xpc_disallow_all_hbs_uv(void)
814 {
815 }
816
817 static void
818 xpc_increment_heartbeat_uv(void)
819 {
820         xpc_heartbeat_uv->value++;
821 }
822
823 static void
824 xpc_offline_heartbeat_uv(void)
825 {
826         xpc_increment_heartbeat_uv();
827         xpc_heartbeat_uv->offline = 1;
828 }
829
830 static void
831 xpc_online_heartbeat_uv(void)
832 {
833         xpc_increment_heartbeat_uv();
834         xpc_heartbeat_uv->offline = 0;
835 }
836
837 static void
838 xpc_heartbeat_init_uv(void)
839 {
840         xpc_heartbeat_uv->value = 1;
841         xpc_heartbeat_uv->offline = 0;
842 }
843
844 static void
845 xpc_heartbeat_exit_uv(void)
846 {
847         xpc_offline_heartbeat_uv();
848 }
849
850 static enum xp_retval
851 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
852 {
853         struct xpc_partition_uv *part_uv = &part->sn.uv;
854         enum xp_retval ret;
855
856         ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
857                                part_uv->heartbeat_gpa,
858                                sizeof(struct xpc_heartbeat_uv));
859         if (ret != xpSuccess)
860                 return ret;
861
862         if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
863             !part_uv->cached_heartbeat.offline) {
864
865                 ret = xpNoHeartbeat;
866         } else {
867                 part->last_heartbeat = part_uv->cached_heartbeat.value;
868         }
869         return ret;
870 }
871
872 static void
873 xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
874                                     unsigned long remote_rp_gpa, int nasid)
875 {
876         short partid = remote_rp->SAL_partid;
877         struct xpc_partition *part = &xpc_partitions[partid];
878         struct xpc_activate_mq_msg_activate_req_uv msg;
879
880         part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
881         part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
882         part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
883         part->sn.uv.activate_gru_mq_desc_gpa =
884             remote_rp->sn.uv.activate_gru_mq_desc_gpa;
885
886         /*
887          * ??? Is it a good idea to make this conditional on what is
888          * ??? potentially stale state information?
889          */
890         if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
891                 msg.rp_gpa = uv_gpa(xpc_rsvd_page);
892                 msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
893                 msg.activate_gru_mq_desc_gpa =
894                     xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
895                 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
896                                            XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
897         }
898
899         if (part->act_state == XPC_P_AS_INACTIVE)
900                 xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
901 }
902
903 static void
904 xpc_request_partition_reactivation_uv(struct xpc_partition *part)
905 {
906         xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
907 }
908
909 static void
910 xpc_request_partition_deactivation_uv(struct xpc_partition *part)
911 {
912         struct xpc_activate_mq_msg_deactivate_req_uv msg;
913
914         /*
915          * ??? Is it a good idea to make this conditional on what is
916          * ??? potentially stale state information?
917          */
918         if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
919             part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
920
921                 msg.reason = part->reason;
922                 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
923                                          XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
924         }
925 }
926
927 static void
928 xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
929 {
930         /* nothing needs to be done */
931         return;
932 }
933
934 static void
935 xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
936 {
937         head->first = NULL;
938         head->last = NULL;
939         spin_lock_init(&head->lock);
940         head->n_entries = 0;
941 }
942
943 static void *
944 xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
945 {
946         unsigned long irq_flags;
947         struct xpc_fifo_entry_uv *first;
948
949         spin_lock_irqsave(&head->lock, irq_flags);
950         first = head->first;
951         if (head->first != NULL) {
952                 head->first = first->next;
953                 if (head->first == NULL)
954                         head->last = NULL;
955
956                 head->n_entries--;
957                 BUG_ON(head->n_entries < 0);
958
959                 first->next = NULL;
960         }
961         spin_unlock_irqrestore(&head->lock, irq_flags);
962         return first;
963 }
964
965 static void
966 xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
967                       struct xpc_fifo_entry_uv *last)
968 {
969         unsigned long irq_flags;
970
971         last->next = NULL;
972         spin_lock_irqsave(&head->lock, irq_flags);
973         if (head->last != NULL)
974                 head->last->next = last;
975         else
976                 head->first = last;
977         head->last = last;
978         head->n_entries++;
979         spin_unlock_irqrestore(&head->lock, irq_flags);
980 }
981
982 static int
983 xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
984 {
985         return head->n_entries;
986 }
987
988 /*
989  * Setup the channel structures that are uv specific.
990  */
991 static enum xp_retval
992 xpc_setup_ch_structures_uv(struct xpc_partition *part)
993 {
994         struct xpc_channel_uv *ch_uv;
995         int ch_number;
996
997         for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
998                 ch_uv = &part->channels[ch_number].sn.uv;
999
1000                 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1001                 xpc_init_fifo_uv(&ch_uv->recv_msg_list);
1002         }
1003
1004         return xpSuccess;
1005 }
1006
1007 /*
1008  * Teardown the channel structures that are uv specific.
1009  */
1010 static void
1011 xpc_teardown_ch_structures_uv(struct xpc_partition *part)
1012 {
1013         /* nothing needs to be done */
1014         return;
1015 }
1016
1017 static enum xp_retval
1018 xpc_make_first_contact_uv(struct xpc_partition *part)
1019 {
1020         struct xpc_activate_mq_msg_uv msg;
1021
1022         /*
1023          * We send a sync msg to get the remote partition's remote_act_state
1024          * updated to our current act_state which at this point should
1025          * be XPC_P_AS_ACTIVATING.
1026          */
1027         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1028                                       XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
1029
1030         while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) ||
1031                  (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) {
1032
1033                 dev_dbg(xpc_part, "waiting to make first contact with "
1034                         "partition %d\n", XPC_PARTID(part));
1035
1036                 /* wait a 1/4 of a second or so */
1037                 (void)msleep_interruptible(250);
1038
1039                 if (part->act_state == XPC_P_AS_DEACTIVATING)
1040                         return part->reason;
1041         }
1042
1043         return xpSuccess;
1044 }
1045
1046 static u64
1047 xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
1048 {
1049         unsigned long irq_flags;
1050         union xpc_channel_ctl_flags chctl;
1051
1052         spin_lock_irqsave(&part->chctl_lock, irq_flags);
1053         chctl = part->chctl;
1054         if (chctl.all_flags != 0)
1055                 part->chctl.all_flags = 0;
1056
1057         spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1058         return chctl.all_flags;
1059 }
1060
1061 static enum xp_retval
1062 xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
1063 {
1064         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1065         struct xpc_send_msg_slot_uv *msg_slot;
1066         unsigned long irq_flags;
1067         int nentries;
1068         int entry;
1069         size_t nbytes;
1070
1071         for (nentries = ch->local_nentries; nentries > 0; nentries--) {
1072                 nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
1073                 ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1074                 if (ch_uv->send_msg_slots == NULL)
1075                         continue;
1076
1077                 for (entry = 0; entry < nentries; entry++) {
1078                         msg_slot = &ch_uv->send_msg_slots[entry];
1079
1080                         msg_slot->msg_slot_number = entry;
1081                         xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
1082                                               &msg_slot->next);
1083                 }
1084
1085                 spin_lock_irqsave(&ch->lock, irq_flags);
1086                 if (nentries < ch->local_nentries)
1087                         ch->local_nentries = nentries;
1088                 spin_unlock_irqrestore(&ch->lock, irq_flags);
1089                 return xpSuccess;
1090         }
1091
1092         return xpNoMemory;
1093 }
1094
1095 static enum xp_retval
1096 xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
1097 {
1098         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1099         struct xpc_notify_mq_msg_uv *msg_slot;
1100         unsigned long irq_flags;
1101         int nentries;
1102         int entry;
1103         size_t nbytes;
1104
1105         for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
1106                 nbytes = nentries * ch->entry_size;
1107                 ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1108                 if (ch_uv->recv_msg_slots == NULL)
1109                         continue;
1110
1111                 for (entry = 0; entry < nentries; entry++) {
1112                         msg_slot = ch_uv->recv_msg_slots +
1113                             entry * ch->entry_size;
1114
1115                         msg_slot->hdr.msg_slot_number = entry;
1116                 }
1117
1118                 spin_lock_irqsave(&ch->lock, irq_flags);
1119                 if (nentries < ch->remote_nentries)
1120                         ch->remote_nentries = nentries;
1121                 spin_unlock_irqrestore(&ch->lock, irq_flags);
1122                 return xpSuccess;
1123         }
1124
1125         return xpNoMemory;
1126 }
1127
1128 /*
1129  * Allocate msg_slots associated with the channel.
1130  */
1131 static enum xp_retval
1132 xpc_setup_msg_structures_uv(struct xpc_channel *ch)
1133 {
1134         static enum xp_retval ret;
1135         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1136
1137         DBUG_ON(ch->flags & XPC_C_SETUP);
1138
1139         ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct
1140                                                    gru_message_queue_desc),
1141                                                    GFP_KERNEL);
1142         if (ch_uv->cached_notify_gru_mq_desc == NULL)
1143                 return xpNoMemory;
1144
1145         ret = xpc_allocate_send_msg_slot_uv(ch);
1146         if (ret == xpSuccess) {
1147
1148                 ret = xpc_allocate_recv_msg_slot_uv(ch);
1149                 if (ret != xpSuccess) {
1150                         kfree(ch_uv->send_msg_slots);
1151                         xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1152                 }
1153         }
1154         return ret;
1155 }
1156
1157 /*
1158  * Free up msg_slots and clear other stuff that were setup for the specified
1159  * channel.
1160  */
1161 static void
1162 xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
1163 {
1164         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1165
1166         DBUG_ON(!spin_is_locked(&ch->lock));
1167
1168         kfree(ch_uv->cached_notify_gru_mq_desc);
1169         ch_uv->cached_notify_gru_mq_desc = NULL;
1170
1171         if (ch->flags & XPC_C_SETUP) {
1172                 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1173                 kfree(ch_uv->send_msg_slots);
1174                 xpc_init_fifo_uv(&ch_uv->recv_msg_list);
1175                 kfree(ch_uv->recv_msg_slots);
1176         }
1177 }
1178
1179 static void
1180 xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1181 {
1182         struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
1183
1184         msg.ch_number = ch->number;
1185         msg.reason = ch->reason;
1186         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1187                                     XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
1188 }
1189
1190 static void
1191 xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1192 {
1193         struct xpc_activate_mq_msg_chctl_closereply_uv msg;
1194
1195         msg.ch_number = ch->number;
1196         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1197                                     XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
1198 }
1199
1200 static void
1201 xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1202 {
1203         struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
1204
1205         msg.ch_number = ch->number;
1206         msg.entry_size = ch->entry_size;
1207         msg.local_nentries = ch->local_nentries;
1208         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1209                                     XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
1210 }
1211
1212 static void
1213 xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1214 {
1215         struct xpc_activate_mq_msg_chctl_openreply_uv msg;
1216
1217         msg.ch_number = ch->number;
1218         msg.local_nentries = ch->local_nentries;
1219         msg.remote_nentries = ch->remote_nentries;
1220         msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
1221         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1222                                     XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
1223 }
1224
1225 static void
1226 xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1227 {
1228         struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
1229
1230         msg.ch_number = ch->number;
1231         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1232                                     XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
1233 }
1234
1235 static void
1236 xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
1237 {
1238         unsigned long irq_flags;
1239
1240         spin_lock_irqsave(&part->chctl_lock, irq_flags);
1241         part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
1242         spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1243
1244         xpc_wakeup_channel_mgr(part);
1245 }
1246
1247 static enum xp_retval
1248 xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
1249                                unsigned long gru_mq_desc_gpa)
1250 {
1251         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1252
1253         DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
1254         return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
1255                                                gru_mq_desc_gpa);
1256 }
1257
1258 static void
1259 xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
1260 {
1261         struct xpc_activate_mq_msg_uv msg;
1262
1263         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1264                                       XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
1265 }
1266
1267 static void
1268 xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
1269 {
1270         struct xpc_activate_mq_msg_uv msg;
1271
1272         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1273                                       XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
1274 }
1275
1276 static void
1277 xpc_assume_partition_disengaged_uv(short partid)
1278 {
1279         struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
1280         unsigned long irq_flags;
1281
1282         spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
1283         part_uv->flags &= ~XPC_P_ENGAGED_UV;
1284         spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
1285 }
1286
1287 static int
1288 xpc_partition_engaged_uv(short partid)
1289 {
1290         return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
1291 }
1292
1293 static int
1294 xpc_any_partition_engaged_uv(void)
1295 {
1296         struct xpc_partition_uv *part_uv;
1297         short partid;
1298
1299         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
1300                 part_uv = &xpc_partitions[partid].sn.uv;
1301                 if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
1302                         return 1;
1303         }
1304         return 0;
1305 }
1306
1307 static enum xp_retval
1308 xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
1309                          struct xpc_send_msg_slot_uv **address_of_msg_slot)
1310 {
1311         enum xp_retval ret;
1312         struct xpc_send_msg_slot_uv *msg_slot;
1313         struct xpc_fifo_entry_uv *entry;
1314
1315         while (1) {
1316                 entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
1317                 if (entry != NULL)
1318                         break;
1319
1320                 if (flags & XPC_NOWAIT)
1321                         return xpNoWait;
1322
1323                 ret = xpc_allocate_msg_wait(ch);
1324                 if (ret != xpInterrupted && ret != xpTimeout)
1325                         return ret;
1326         }
1327
1328         msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
1329         *address_of_msg_slot = msg_slot;
1330         return xpSuccess;
1331 }
1332
1333 static void
1334 xpc_free_msg_slot_uv(struct xpc_channel *ch,
1335                      struct xpc_send_msg_slot_uv *msg_slot)
1336 {
1337         xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
1338
1339         /* wakeup anyone waiting for a free msg slot */
1340         if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1341                 wake_up(&ch->msg_allocate_wq);
1342 }
1343
1344 static void
1345 xpc_notify_sender_uv(struct xpc_channel *ch,
1346                      struct xpc_send_msg_slot_uv *msg_slot,
1347                      enum xp_retval reason)
1348 {
1349         xpc_notify_func func = msg_slot->func;
1350
1351         if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
1352
1353                 atomic_dec(&ch->n_to_notify);
1354
1355                 dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
1356                         "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1357                         msg_slot->msg_slot_number, ch->partid, ch->number);
1358
1359                 func(reason, ch->partid, ch->number, msg_slot->key);
1360
1361                 dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
1362                         "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1363                         msg_slot->msg_slot_number, ch->partid, ch->number);
1364         }
1365 }
1366
1367 static void
1368 xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
1369                             struct xpc_notify_mq_msg_uv *msg)
1370 {
1371         struct xpc_send_msg_slot_uv *msg_slot;
1372         int entry = msg->hdr.msg_slot_number % ch->local_nentries;
1373
1374         msg_slot = &ch->sn.uv.send_msg_slots[entry];
1375
1376         BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
1377         msg_slot->msg_slot_number += ch->local_nentries;
1378
1379         if (msg_slot->func != NULL)
1380                 xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
1381
1382         xpc_free_msg_slot_uv(ch, msg_slot);
1383 }
1384
1385 static void
1386 xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
1387                             struct xpc_notify_mq_msg_uv *msg)
1388 {
1389         struct xpc_partition_uv *part_uv = &part->sn.uv;
1390         struct xpc_channel *ch;
1391         struct xpc_channel_uv *ch_uv;
1392         struct xpc_notify_mq_msg_uv *msg_slot;
1393         unsigned long irq_flags;
1394         int ch_number = msg->hdr.ch_number;
1395
1396         if (unlikely(ch_number >= part->nchannels)) {
1397                 dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
1398                         "channel number=0x%x in message from partid=%d\n",
1399                         ch_number, XPC_PARTID(part));
1400
1401                 /* get hb checker to deactivate from the remote partition */
1402                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1403                 if (part_uv->act_state_req == 0)
1404                         xpc_activate_IRQ_rcvd++;
1405                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
1406                 part_uv->reason = xpBadChannelNumber;
1407                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1408
1409                 wake_up_interruptible(&xpc_activate_IRQ_wq);
1410                 return;
1411         }
1412
1413         ch = &part->channels[ch_number];
1414         xpc_msgqueue_ref(ch);
1415
1416         if (!(ch->flags & XPC_C_CONNECTED)) {
1417                 xpc_msgqueue_deref(ch);
1418                 return;
1419         }
1420
1421         /* see if we're really dealing with an ACK for a previously sent msg */
1422         if (msg->hdr.size == 0) {
1423                 xpc_handle_notify_mq_ack_uv(ch, msg);
1424                 xpc_msgqueue_deref(ch);
1425                 return;
1426         }
1427
1428         /* we're dealing with a normal message sent via the notify_mq */
1429         ch_uv = &ch->sn.uv;
1430
1431         msg_slot = ch_uv->recv_msg_slots +
1432             (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
1433
1434         BUG_ON(msg_slot->hdr.size != 0);
1435
1436         memcpy(msg_slot, msg, msg->hdr.size);
1437
1438         xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
1439
1440         if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
1441                 /*
1442                  * If there is an existing idle kthread get it to deliver
1443                  * the payload, otherwise we'll have to get the channel mgr
1444                  * for this partition to create a kthread to do the delivery.
1445                  */
1446                 if (atomic_read(&ch->kthreads_idle) > 0)
1447                         wake_up_nr(&ch->idle_wq, 1);
1448                 else
1449                         xpc_send_chctl_local_msgrequest_uv(part, ch->number);
1450         }
1451         xpc_msgqueue_deref(ch);
1452 }
1453
1454 static irqreturn_t
1455 xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
1456 {
1457         struct xpc_notify_mq_msg_uv *msg;
1458         short partid;
1459         struct xpc_partition *part;
1460
1461         while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
1462                NULL) {
1463
1464                 partid = msg->hdr.partid;
1465                 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
1466                         dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
1467                                 "invalid partid=0x%x in message\n", partid);
1468                 } else {
1469                         part = &xpc_partitions[partid];
1470
1471                         if (xpc_part_ref(part)) {
1472                                 xpc_handle_notify_mq_msg_uv(part, msg);
1473                                 xpc_part_deref(part);
1474                         }
1475                 }
1476
1477                 gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
1478         }
1479
1480         return IRQ_HANDLED;
1481 }
1482
1483 static int
1484 xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
1485 {
1486         return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
1487 }
1488
1489 static void
1490 xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
1491 {
1492         struct xpc_channel *ch = &part->channels[ch_number];
1493         int ndeliverable_payloads;
1494
1495         xpc_msgqueue_ref(ch);
1496
1497         ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
1498
1499         if (ndeliverable_payloads > 0 &&
1500             (ch->flags & XPC_C_CONNECTED) &&
1501             (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
1502
1503                 xpc_activate_kthreads(ch, ndeliverable_payloads);
1504         }
1505
1506         xpc_msgqueue_deref(ch);
1507 }
1508
1509 static enum xp_retval
1510 xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
1511                     u16 payload_size, u8 notify_type, xpc_notify_func func,
1512                     void *key)
1513 {
1514         enum xp_retval ret = xpSuccess;
1515         struct xpc_send_msg_slot_uv *msg_slot = NULL;
1516         struct xpc_notify_mq_msg_uv *msg;
1517         u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
1518         size_t msg_size;
1519
1520         DBUG_ON(notify_type != XPC_N_CALL);
1521
1522         msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
1523         if (msg_size > ch->entry_size)
1524                 return xpPayloadTooBig;
1525
1526         xpc_msgqueue_ref(ch);
1527
1528         if (ch->flags & XPC_C_DISCONNECTING) {
1529                 ret = ch->reason;
1530                 goto out_1;
1531         }
1532         if (!(ch->flags & XPC_C_CONNECTED)) {
1533                 ret = xpNotConnected;
1534                 goto out_1;
1535         }
1536
1537         ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
1538         if (ret != xpSuccess)
1539                 goto out_1;
1540
1541         if (func != NULL) {
1542                 atomic_inc(&ch->n_to_notify);
1543
1544                 msg_slot->key = key;
1545                 smp_wmb(); /* a non-NULL func must hit memory after the key */
1546                 msg_slot->func = func;
1547
1548                 if (ch->flags & XPC_C_DISCONNECTING) {
1549                         ret = ch->reason;
1550                         goto out_2;
1551                 }
1552         }
1553
1554         msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
1555         msg->hdr.partid = xp_partition_id;
1556         msg->hdr.ch_number = ch->number;
1557         msg->hdr.size = msg_size;
1558         msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
1559         memcpy(&msg->payload, payload, payload_size);
1560
1561         ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1562                                msg_size);
1563         if (ret == xpSuccess)
1564                 goto out_1;
1565
1566         XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1567 out_2:
1568         if (func != NULL) {
1569                 /*
1570                  * Try to NULL the msg_slot's func field. If we fail, then
1571                  * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
1572                  * case we need to pretend we succeeded to send the message
1573                  * since the user will get a callout for the disconnect error
1574                  * by xpc_notify_senders_of_disconnect_uv(), and to also get an
1575                  * error returned here will confuse them. Additionally, since
1576                  * in this case the channel is being disconnected we don't need
1577                  * to put the the msg_slot back on the free list.
1578                  */
1579                 if (cmpxchg(&msg_slot->func, func, NULL) != func) {
1580                         ret = xpSuccess;
1581                         goto out_1;
1582                 }
1583
1584                 msg_slot->key = NULL;
1585                 atomic_dec(&ch->n_to_notify);
1586         }
1587         xpc_free_msg_slot_uv(ch, msg_slot);
1588 out_1:
1589         xpc_msgqueue_deref(ch);
1590         return ret;
1591 }
1592
1593 /*
1594  * Tell the callers of xpc_send_notify() that the status of their payloads
1595  * is unknown because the channel is now disconnecting.
1596  *
1597  * We don't worry about putting these msg_slots on the free list since the
1598  * msg_slots themselves are about to be kfree'd.
1599  */
1600 static void
1601 xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
1602 {
1603         struct xpc_send_msg_slot_uv *msg_slot;
1604         int entry;
1605
1606         DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
1607
1608         for (entry = 0; entry < ch->local_nentries; entry++) {
1609
1610                 if (atomic_read(&ch->n_to_notify) == 0)
1611                         break;
1612
1613                 msg_slot = &ch->sn.uv.send_msg_slots[entry];
1614                 if (msg_slot->func != NULL)
1615                         xpc_notify_sender_uv(ch, msg_slot, ch->reason);
1616         }
1617 }
1618
1619 /*
1620  * Get the next deliverable message's payload.
1621  */
1622 static void *
1623 xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
1624 {
1625         struct xpc_fifo_entry_uv *entry;
1626         struct xpc_notify_mq_msg_uv *msg;
1627         void *payload = NULL;
1628
1629         if (!(ch->flags & XPC_C_DISCONNECTING)) {
1630                 entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
1631                 if (entry != NULL) {
1632                         msg = container_of(entry, struct xpc_notify_mq_msg_uv,
1633                                            hdr.u.next);
1634                         payload = &msg->payload;
1635                 }
1636         }
1637         return payload;
1638 }
1639
1640 static void
1641 xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
1642 {
1643         struct xpc_notify_mq_msg_uv *msg;
1644         enum xp_retval ret;
1645
1646         msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
1647
1648         /* return an ACK to the sender of this message */
1649
1650         msg->hdr.partid = xp_partition_id;
1651         msg->hdr.size = 0;      /* size of zero indicates this is an ACK */
1652
1653         ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1654                                sizeof(struct xpc_notify_mq_msghdr_uv));
1655         if (ret != xpSuccess)
1656                 XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1657 }
1658
1659 static struct xpc_arch_operations xpc_arch_ops_uv = {
1660         .setup_partitions = xpc_setup_partitions_uv,
1661         .teardown_partitions = xpc_teardown_partitions_uv,
1662         .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
1663         .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
1664         .setup_rsvd_page = xpc_setup_rsvd_page_uv,
1665
1666         .allow_hb = xpc_allow_hb_uv,
1667         .disallow_hb = xpc_disallow_hb_uv,
1668         .disallow_all_hbs = xpc_disallow_all_hbs_uv,
1669         .increment_heartbeat = xpc_increment_heartbeat_uv,
1670         .offline_heartbeat = xpc_offline_heartbeat_uv,
1671         .online_heartbeat = xpc_online_heartbeat_uv,
1672         .heartbeat_init = xpc_heartbeat_init_uv,
1673         .heartbeat_exit = xpc_heartbeat_exit_uv,
1674         .get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
1675
1676         .request_partition_activation =
1677                 xpc_request_partition_activation_uv,
1678         .request_partition_reactivation =
1679                 xpc_request_partition_reactivation_uv,
1680         .request_partition_deactivation =
1681                 xpc_request_partition_deactivation_uv,
1682         .cancel_partition_deactivation_request =
1683                 xpc_cancel_partition_deactivation_request_uv,
1684
1685         .setup_ch_structures = xpc_setup_ch_structures_uv,
1686         .teardown_ch_structures = xpc_teardown_ch_structures_uv,
1687
1688         .make_first_contact = xpc_make_first_contact_uv,
1689
1690         .get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
1691         .send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
1692         .send_chctl_closereply = xpc_send_chctl_closereply_uv,
1693         .send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
1694         .send_chctl_openreply = xpc_send_chctl_openreply_uv,
1695         .send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
1696         .process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
1697
1698         .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
1699
1700         .setup_msg_structures = xpc_setup_msg_structures_uv,
1701         .teardown_msg_structures = xpc_teardown_msg_structures_uv,
1702
1703         .indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
1704         .indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
1705         .assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
1706         .partition_engaged = xpc_partition_engaged_uv,
1707         .any_partition_engaged = xpc_any_partition_engaged_uv,
1708
1709         .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
1710         .send_payload = xpc_send_payload_uv,
1711         .get_deliverable_payload = xpc_get_deliverable_payload_uv,
1712         .received_payload = xpc_received_payload_uv,
1713         .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
1714 };
1715
1716 int
1717 xpc_init_uv(void)
1718 {
1719         xpc_arch_ops = xpc_arch_ops_uv;
1720
1721         if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
1722                 dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
1723                         XPC_MSG_HDR_MAX_SIZE);
1724                 return -E2BIG;
1725         }
1726
1727         xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0,
1728                                                   XPC_ACTIVATE_IRQ_NAME,
1729                                                   xpc_handle_activate_IRQ_uv);
1730         if (IS_ERR(xpc_activate_mq_uv))
1731                 return PTR_ERR(xpc_activate_mq_uv);
1732
1733         xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0,
1734                                                 XPC_NOTIFY_IRQ_NAME,
1735                                                 xpc_handle_notify_IRQ_uv);
1736         if (IS_ERR(xpc_notify_mq_uv)) {
1737                 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1738                 return PTR_ERR(xpc_notify_mq_uv);
1739         }
1740
1741         return 0;
1742 }
1743
1744 void
1745 xpc_exit_uv(void)
1746 {
1747         xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
1748         xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1749 }