Dean Nelson | 89eb8eb | 2005-03-23 19:50:00 -0700 | [diff] [blame] | 1 | /* |
| 2 | * This file is subject to the terms and conditions of the GNU General Public |
| 3 | * License. See the file "COPYING" in the main directory of this archive |
| 4 | * for more details. |
| 5 | * |
| 6 | * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. |
| 7 | */ |
| 8 | |
| 9 | |
| 10 | /* |
| 11 | * Cross Partition Communication (XPC) partition support. |
| 12 | * |
| 13 | * This is the part of XPC that detects the presence/absence of |
| 14 | * other partitions. It provides a heartbeat and monitors the |
| 15 | * heartbeats of other partitions. |
| 16 | * |
| 17 | */ |
| 18 | |
| 19 | |
| 20 | #include <linux/kernel.h> |
| 21 | #include <linux/sysctl.h> |
| 22 | #include <linux/cache.h> |
| 23 | #include <linux/mmzone.h> |
| 24 | #include <linux/nodemask.h> |
Jes Sorensen | 65ed0b3 | 2005-06-21 17:15:03 -0700 | [diff] [blame^] | 25 | #include <asm/uncached.h> |
Dean Nelson | 89eb8eb | 2005-03-23 19:50:00 -0700 | [diff] [blame] | 26 | #include <asm/sn/bte.h> |
| 27 | #include <asm/sn/intr.h> |
| 28 | #include <asm/sn/sn_sal.h> |
| 29 | #include <asm/sn/nodepda.h> |
| 30 | #include <asm/sn/addrs.h> |
| 31 | #include "xpc.h" |
| 32 | |
| 33 | |
| 34 | /* XPC is exiting flag */ |
| 35 | int xpc_exiting; |
| 36 | |
| 37 | |
| 38 | /* SH_IPI_ACCESS shub register value on startup */ |
| 39 | static u64 xpc_sh1_IPI_access; |
| 40 | static u64 xpc_sh2_IPI_access0; |
| 41 | static u64 xpc_sh2_IPI_access1; |
| 42 | static u64 xpc_sh2_IPI_access2; |
| 43 | static u64 xpc_sh2_IPI_access3; |
| 44 | |
| 45 | |
| 46 | /* original protection values for each node */ |
| 47 | u64 xpc_prot_vec[MAX_COMPACT_NODES]; |
| 48 | |
| 49 | |
| 50 | /* this partition's reserved page */ |
| 51 | struct xpc_rsvd_page *xpc_rsvd_page; |
| 52 | |
| 53 | /* this partition's XPC variables (within the reserved page) */ |
| 54 | struct xpc_vars *xpc_vars; |
| 55 | struct xpc_vars_part *xpc_vars_part; |
| 56 | |
| 57 | |
| 58 | /* |
| 59 | * For performance reasons, each entry of xpc_partitions[] is cacheline |
| 60 | * aligned. And xpc_partitions[] is padded with an additional entry at the |
| 61 | * end so that the last legitimate entry doesn't share its cacheline with |
| 62 | * another variable. |
| 63 | */ |
| 64 | struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; |
| 65 | |
| 66 | |
| 67 | /* |
| 68 | * Generic buffer used to store a local copy of the remote partitions |
| 69 | * reserved page or XPC variables. |
| 70 | * |
| 71 | * xpc_discovery runs only once and is a seperate thread that is |
| 72 | * very likely going to be processing in parallel with receiving |
| 73 | * interrupts. |
| 74 | */ |
| 75 | char ____cacheline_aligned |
| 76 | xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE]; |
| 77 | |
| 78 | |
| 79 | /* systune related variables */ |
| 80 | int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; |
| 81 | int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT; |
| 82 | |
| 83 | |
| 84 | /* |
| 85 | * Given a nasid, get the physical address of the partition's reserved page |
| 86 | * for that nasid. This function returns 0 on any error. |
| 87 | */ |
| 88 | static u64 |
| 89 | xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size) |
| 90 | { |
| 91 | bte_result_t bte_res; |
| 92 | s64 status; |
| 93 | u64 cookie = 0; |
| 94 | u64 rp_pa = nasid; /* seed with nasid */ |
| 95 | u64 len = 0; |
| 96 | |
| 97 | |
| 98 | while (1) { |
| 99 | |
| 100 | status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, |
| 101 | &len); |
| 102 | |
| 103 | dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" |
| 104 | "0x%016lx, address=0x%016lx, len=0x%016lx\n", |
| 105 | status, cookie, rp_pa, len); |
| 106 | |
| 107 | if (status != SALRET_MORE_PASSES) { |
| 108 | break; |
| 109 | } |
| 110 | |
| 111 | if (len > buf_size) { |
| 112 | dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len); |
| 113 | status = SALRET_ERROR; |
| 114 | break; |
| 115 | } |
| 116 | |
| 117 | bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size, |
| 118 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); |
| 119 | if (bte_res != BTE_SUCCESS) { |
| 120 | dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); |
| 121 | status = SALRET_ERROR; |
| 122 | break; |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | if (status != SALRET_OK) { |
| 127 | rp_pa = 0; |
| 128 | } |
| 129 | dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); |
| 130 | return rp_pa; |
| 131 | } |
| 132 | |
| 133 | |
| 134 | /* |
| 135 | * Fill the partition reserved page with the information needed by |
| 136 | * other partitions to discover we are alive and establish initial |
| 137 | * communications. |
| 138 | */ |
| 139 | struct xpc_rsvd_page * |
| 140 | xpc_rsvd_page_init(void) |
| 141 | { |
| 142 | struct xpc_rsvd_page *rp; |
| 143 | AMO_t *amos_page; |
| 144 | u64 rp_pa, next_cl, nasid_array = 0; |
| 145 | int i, ret; |
| 146 | |
| 147 | |
| 148 | /* get the local reserved page's address */ |
| 149 | |
| 150 | rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0), |
| 151 | (u64) xpc_remote_copy_buffer, |
| 152 | XPC_RSVD_PAGE_ALIGNED_SIZE); |
| 153 | if (rp_pa == 0) { |
| 154 | dev_err(xpc_part, "SAL failed to locate the reserved page\n"); |
| 155 | return NULL; |
| 156 | } |
| 157 | rp = (struct xpc_rsvd_page *) __va(rp_pa); |
| 158 | |
| 159 | if (rp->partid != sn_partition_id) { |
| 160 | dev_err(xpc_part, "the reserved page's partid of %d should be " |
| 161 | "%d\n", rp->partid, sn_partition_id); |
| 162 | return NULL; |
| 163 | } |
| 164 | |
| 165 | rp->version = XPC_RP_VERSION; |
| 166 | |
| 167 | /* |
| 168 | * Place the XPC variables on the cache line following the |
| 169 | * reserved page structure. |
| 170 | */ |
| 171 | next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE; |
| 172 | xpc_vars = (struct xpc_vars *) next_cl; |
| 173 | |
| 174 | /* |
| 175 | * Before clearing xpc_vars, see if a page of AMOs had been previously |
| 176 | * allocated. If not we'll need to allocate one and set permissions |
| 177 | * so that cross-partition AMOs are allowed. |
| 178 | * |
| 179 | * The allocated AMO page needs MCA reporting to remain disabled after |
| 180 | * XPC has unloaded. To make this work, we keep a copy of the pointer |
| 181 | * to this page (i.e., amos_page) in the struct xpc_vars structure, |
| 182 | * which is pointed to by the reserved page, and re-use that saved copy |
| 183 | * on subsequent loads of XPC. This AMO page is never freed, and its |
| 184 | * memory protections are never restricted. |
| 185 | */ |
| 186 | if ((amos_page = xpc_vars->amos_page) == NULL) { |
Jes Sorensen | 65ed0b3 | 2005-06-21 17:15:03 -0700 | [diff] [blame^] | 187 | amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0)); |
Dean Nelson | 89eb8eb | 2005-03-23 19:50:00 -0700 | [diff] [blame] | 188 | if (amos_page == NULL) { |
| 189 | dev_err(xpc_part, "can't allocate page of AMOs\n"); |
| 190 | return NULL; |
| 191 | } |
| 192 | |
| 193 | /* |
| 194 | * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems |
| 195 | * when xpc_allow_IPI_ops() is called via xpc_hb_init(). |
| 196 | */ |
| 197 | if (!enable_shub_wars_1_1()) { |
| 198 | ret = sn_change_memprotect(ia64_tpa((u64) amos_page), |
| 199 | PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1, |
| 200 | &nasid_array); |
| 201 | if (ret != 0) { |
| 202 | dev_err(xpc_part, "can't change memory " |
| 203 | "protections\n"); |
Jes Sorensen | 65ed0b3 | 2005-06-21 17:15:03 -0700 | [diff] [blame^] | 204 | uncached_free_page(__IA64_UNCACHED_OFFSET | |
| 205 | TO_PHYS((u64) amos_page)); |
Dean Nelson | 89eb8eb | 2005-03-23 19:50:00 -0700 | [diff] [blame] | 206 | return NULL; |
| 207 | } |
| 208 | } |
Dean Nelson | 3a7d555 | 2005-04-04 13:14:00 -0700 | [diff] [blame] | 209 | } else if (!IS_AMO_ADDRESS((u64) amos_page)) { |
| 210 | /* |
| 211 | * EFI's XPBOOT can also set amos_page in the reserved page, |
| 212 | * but it happens to leave it as an uncached physical address |
| 213 | * and we need it to be an uncached virtual, so we'll have to |
| 214 | * convert it. |
| 215 | */ |
| 216 | if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) { |
| 217 | dev_err(xpc_part, "previously used amos_page address " |
| 218 | "is bad = 0x%p\n", (void *) amos_page); |
| 219 | return NULL; |
| 220 | } |
| 221 | amos_page = (AMO_t *) TO_AMO((u64) amos_page); |
Dean Nelson | 89eb8eb | 2005-03-23 19:50:00 -0700 | [diff] [blame] | 222 | } |
| 223 | |
| 224 | memset(xpc_vars, 0, sizeof(struct xpc_vars)); |
| 225 | |
| 226 | /* |
| 227 | * Place the XPC per partition specific variables on the cache line |
| 228 | * following the XPC variables structure. |
| 229 | */ |
| 230 | next_cl += XPC_VARS_ALIGNED_SIZE; |
| 231 | memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) * |
| 232 | XP_MAX_PARTITIONS); |
| 233 | xpc_vars_part = (struct xpc_vars_part *) next_cl; |
| 234 | xpc_vars->vars_part_pa = __pa(next_cl); |
| 235 | |
| 236 | xpc_vars->version = XPC_V_VERSION; |
| 237 | xpc_vars->act_nasid = cpuid_to_nasid(0); |
| 238 | xpc_vars->act_phys_cpuid = cpu_physical_id(0); |
| 239 | xpc_vars->amos_page = amos_page; /* save for next load of XPC */ |
| 240 | |
| 241 | |
| 242 | /* |
| 243 | * Initialize the activation related AMO variables. |
| 244 | */ |
| 245 | xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS); |
| 246 | for (i = 1; i < XP_NASID_MASK_WORDS; i++) { |
| 247 | xpc_IPI_init(i + XP_MAX_PARTITIONS); |
| 248 | } |
| 249 | /* export AMO page's physical address to other partitions */ |
| 250 | xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page); |
| 251 | |
| 252 | /* |
| 253 | * This signifies to the remote partition that our reserved |
| 254 | * page is initialized. |
| 255 | */ |
| 256 | (volatile u64) rp->vars_pa = __pa(xpc_vars); |
| 257 | |
| 258 | return rp; |
| 259 | } |
| 260 | |
| 261 | |
| 262 | /* |
| 263 | * Change protections to allow IPI operations (and AMO operations on |
| 264 | * Shub 1.1 systems). |
| 265 | */ |
| 266 | void |
| 267 | xpc_allow_IPI_ops(void) |
| 268 | { |
| 269 | int node; |
| 270 | int nasid; |
| 271 | |
| 272 | |
| 273 | // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. |
| 274 | |
| 275 | if (is_shub2()) { |
| 276 | xpc_sh2_IPI_access0 = |
| 277 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); |
| 278 | xpc_sh2_IPI_access1 = |
| 279 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); |
| 280 | xpc_sh2_IPI_access2 = |
| 281 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); |
| 282 | xpc_sh2_IPI_access3 = |
| 283 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); |
| 284 | |
| 285 | for_each_online_node(node) { |
| 286 | nasid = cnodeid_to_nasid(node); |
| 287 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), |
| 288 | -1UL); |
| 289 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), |
| 290 | -1UL); |
| 291 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), |
| 292 | -1UL); |
| 293 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), |
| 294 | -1UL); |
| 295 | } |
| 296 | |
| 297 | } else { |
| 298 | xpc_sh1_IPI_access = |
| 299 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); |
| 300 | |
| 301 | for_each_online_node(node) { |
| 302 | nasid = cnodeid_to_nasid(node); |
| 303 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), |
| 304 | -1UL); |
| 305 | |
| 306 | /* |
| 307 | * Since the BIST collides with memory operations on |
| 308 | * SHUB 1.1 sn_change_memprotect() cannot be used. |
| 309 | */ |
| 310 | if (enable_shub_wars_1_1()) { |
| 311 | /* open up everything */ |
| 312 | xpc_prot_vec[node] = (u64) HUB_L((u64 *) |
| 313 | GLOBAL_MMR_ADDR(nasid, |
| 314 | SH1_MD_DQLP_MMR_DIR_PRIVEC0)); |
| 315 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, |
| 316 | SH1_MD_DQLP_MMR_DIR_PRIVEC0), |
| 317 | -1UL); |
| 318 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, |
| 319 | SH1_MD_DQRP_MMR_DIR_PRIVEC0), |
| 320 | -1UL); |
| 321 | } |
| 322 | } |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | |
| 327 | /* |
| 328 | * Restrict protections to disallow IPI operations (and AMO operations on |
| 329 | * Shub 1.1 systems). |
| 330 | */ |
| 331 | void |
| 332 | xpc_restrict_IPI_ops(void) |
| 333 | { |
| 334 | int node; |
| 335 | int nasid; |
| 336 | |
| 337 | |
| 338 | // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. |
| 339 | |
| 340 | if (is_shub2()) { |
| 341 | |
| 342 | for_each_online_node(node) { |
| 343 | nasid = cnodeid_to_nasid(node); |
| 344 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), |
| 345 | xpc_sh2_IPI_access0); |
| 346 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), |
| 347 | xpc_sh2_IPI_access1); |
| 348 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), |
| 349 | xpc_sh2_IPI_access2); |
| 350 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), |
| 351 | xpc_sh2_IPI_access3); |
| 352 | } |
| 353 | |
| 354 | } else { |
| 355 | |
| 356 | for_each_online_node(node) { |
| 357 | nasid = cnodeid_to_nasid(node); |
| 358 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), |
| 359 | xpc_sh1_IPI_access); |
| 360 | |
| 361 | if (enable_shub_wars_1_1()) { |
| 362 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, |
| 363 | SH1_MD_DQLP_MMR_DIR_PRIVEC0), |
| 364 | xpc_prot_vec[node]); |
| 365 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, |
| 366 | SH1_MD_DQRP_MMR_DIR_PRIVEC0), |
| 367 | xpc_prot_vec[node]); |
| 368 | } |
| 369 | } |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | |
| 374 | /* |
| 375 | * At periodic intervals, scan through all active partitions and ensure |
| 376 | * their heartbeat is still active. If not, the partition is deactivated. |
| 377 | */ |
| 378 | void |
| 379 | xpc_check_remote_hb(void) |
| 380 | { |
| 381 | struct xpc_vars *remote_vars; |
| 382 | struct xpc_partition *part; |
| 383 | partid_t partid; |
| 384 | bte_result_t bres; |
| 385 | |
| 386 | |
| 387 | remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; |
| 388 | |
| 389 | for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { |
| 390 | if (partid == sn_partition_id) { |
| 391 | continue; |
| 392 | } |
| 393 | |
| 394 | part = &xpc_partitions[partid]; |
| 395 | |
| 396 | if (part->act_state == XPC_P_INACTIVE || |
| 397 | part->act_state == XPC_P_DEACTIVATING) { |
| 398 | continue; |
| 399 | } |
| 400 | |
| 401 | /* pull the remote_hb cache line */ |
| 402 | bres = xp_bte_copy(part->remote_vars_pa, |
| 403 | ia64_tpa((u64) remote_vars), |
| 404 | XPC_VARS_ALIGNED_SIZE, |
| 405 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); |
| 406 | if (bres != BTE_SUCCESS) { |
| 407 | XPC_DEACTIVATE_PARTITION(part, |
| 408 | xpc_map_bte_errors(bres)); |
| 409 | continue; |
| 410 | } |
| 411 | |
| 412 | dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" |
| 413 | " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid, |
| 414 | remote_vars->heartbeat, part->last_heartbeat, |
| 415 | remote_vars->kdb_status, |
| 416 | remote_vars->heartbeating_to_mask); |
| 417 | |
| 418 | if (((remote_vars->heartbeat == part->last_heartbeat) && |
| 419 | (remote_vars->kdb_status == 0)) || |
| 420 | !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { |
| 421 | |
| 422 | XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); |
| 423 | continue; |
| 424 | } |
| 425 | |
| 426 | part->last_heartbeat = remote_vars->heartbeat; |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | |
| 431 | /* |
| 432 | * Get a copy of the remote partition's rsvd page. |
| 433 | * |
| 434 | * remote_rp points to a buffer that is cacheline aligned for BTE copies and |
| 435 | * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE. |
| 436 | */ |
| 437 | static enum xpc_retval |
| 438 | xpc_get_remote_rp(int nasid, u64 *discovered_nasids, |
| 439 | struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa) |
| 440 | { |
| 441 | int bres, i; |
| 442 | |
| 443 | |
| 444 | /* get the reserved page's physical address */ |
| 445 | |
| 446 | *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, |
| 447 | XPC_RSVD_PAGE_ALIGNED_SIZE); |
| 448 | if (*remote_rsvd_page_pa == 0) { |
| 449 | return xpcNoRsvdPageAddr; |
| 450 | } |
| 451 | |
| 452 | |
| 453 | /* pull over the reserved page structure */ |
| 454 | |
| 455 | bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp), |
| 456 | XPC_RSVD_PAGE_ALIGNED_SIZE, |
| 457 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); |
| 458 | if (bres != BTE_SUCCESS) { |
| 459 | return xpc_map_bte_errors(bres); |
| 460 | } |
| 461 | |
| 462 | |
| 463 | if (discovered_nasids != NULL) { |
| 464 | for (i = 0; i < XP_NASID_MASK_WORDS; i++) { |
| 465 | discovered_nasids[i] |= remote_rp->part_nasids[i]; |
| 466 | } |
| 467 | } |
| 468 | |
| 469 | |
| 470 | /* check that the partid is for another partition */ |
| 471 | |
| 472 | if (remote_rp->partid < 1 || |
| 473 | remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { |
| 474 | return xpcInvalidPartid; |
| 475 | } |
| 476 | |
| 477 | if (remote_rp->partid == sn_partition_id) { |
| 478 | return xpcLocalPartid; |
| 479 | } |
| 480 | |
| 481 | |
| 482 | if (XPC_VERSION_MAJOR(remote_rp->version) != |
| 483 | XPC_VERSION_MAJOR(XPC_RP_VERSION)) { |
| 484 | return xpcBadVersion; |
| 485 | } |
| 486 | |
| 487 | return xpcSuccess; |
| 488 | } |
| 489 | |
| 490 | |
| 491 | /* |
| 492 | * Get a copy of the remote partition's XPC variables. |
| 493 | * |
| 494 | * remote_vars points to a buffer that is cacheline aligned for BTE copies and |
| 495 | * assumed to be of size XPC_VARS_ALIGNED_SIZE. |
| 496 | */ |
| 497 | static enum xpc_retval |
| 498 | xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) |
| 499 | { |
| 500 | int bres; |
| 501 | |
| 502 | |
| 503 | if (remote_vars_pa == 0) { |
| 504 | return xpcVarsNotSet; |
| 505 | } |
| 506 | |
| 507 | |
| 508 | /* pull over the cross partition variables */ |
| 509 | |
| 510 | bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars), |
| 511 | XPC_VARS_ALIGNED_SIZE, |
| 512 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); |
| 513 | if (bres != BTE_SUCCESS) { |
| 514 | return xpc_map_bte_errors(bres); |
| 515 | } |
| 516 | |
| 517 | if (XPC_VERSION_MAJOR(remote_vars->version) != |
| 518 | XPC_VERSION_MAJOR(XPC_V_VERSION)) { |
| 519 | return xpcBadVersion; |
| 520 | } |
| 521 | |
| 522 | return xpcSuccess; |
| 523 | } |
| 524 | |
| 525 | |
| 526 | /* |
| 527 | * Prior code has determine the nasid which generated an IPI. Inspect |
| 528 | * that nasid to determine if its partition needs to be activated or |
| 529 | * deactivated. |
| 530 | * |
| 531 | * A partition is consider "awaiting activation" if our partition |
| 532 | * flags indicate it is not active and it has a heartbeat. A |
| 533 | * partition is considered "awaiting deactivation" if our partition |
| 534 | * flags indicate it is active but it has no heartbeat or it is not |
| 535 | * sending its heartbeat to us. |
| 536 | * |
| 537 | * To determine the heartbeat, the remote nasid must have a properly |
| 538 | * initialized reserved page. |
| 539 | */ |
| 540 | static void |
| 541 | xpc_identify_act_IRQ_req(int nasid) |
| 542 | { |
| 543 | struct xpc_rsvd_page *remote_rp; |
| 544 | struct xpc_vars *remote_vars; |
| 545 | u64 remote_rsvd_page_pa; |
| 546 | u64 remote_vars_pa; |
| 547 | partid_t partid; |
| 548 | struct xpc_partition *part; |
| 549 | enum xpc_retval ret; |
| 550 | |
| 551 | |
| 552 | /* pull over the reserved page structure */ |
| 553 | |
| 554 | remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; |
| 555 | |
| 556 | ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa); |
| 557 | if (ret != xpcSuccess) { |
| 558 | dev_warn(xpc_part, "unable to get reserved page from nasid %d, " |
| 559 | "which sent interrupt, reason=%d\n", nasid, ret); |
| 560 | return; |
| 561 | } |
| 562 | |
| 563 | remote_vars_pa = remote_rp->vars_pa; |
| 564 | partid = remote_rp->partid; |
| 565 | part = &xpc_partitions[partid]; |
| 566 | |
| 567 | |
| 568 | /* pull over the cross partition variables */ |
| 569 | |
| 570 | remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; |
| 571 | |
| 572 | ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); |
| 573 | if (ret != xpcSuccess) { |
| 574 | |
| 575 | dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " |
| 576 | "which sent interrupt, reason=%d\n", nasid, ret); |
| 577 | |
| 578 | XPC_DEACTIVATE_PARTITION(part, ret); |
| 579 | return; |
| 580 | } |
| 581 | |
| 582 | |
| 583 | part->act_IRQ_rcvd++; |
| 584 | |
| 585 | dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " |
| 586 | "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, |
| 587 | remote_vars->heartbeat, remote_vars->heartbeating_to_mask); |
| 588 | |
| 589 | |
| 590 | if (part->act_state == XPC_P_INACTIVE) { |
| 591 | |
| 592 | part->remote_rp_pa = remote_rsvd_page_pa; |
| 593 | dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", |
| 594 | part->remote_rp_pa); |
| 595 | |
| 596 | part->remote_vars_pa = remote_vars_pa; |
| 597 | dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", |
| 598 | part->remote_vars_pa); |
| 599 | |
| 600 | part->last_heartbeat = remote_vars->heartbeat; |
| 601 | dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", |
| 602 | part->last_heartbeat); |
| 603 | |
| 604 | part->remote_vars_part_pa = remote_vars->vars_part_pa; |
| 605 | dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", |
| 606 | part->remote_vars_part_pa); |
| 607 | |
| 608 | part->remote_act_nasid = remote_vars->act_nasid; |
| 609 | dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", |
| 610 | part->remote_act_nasid); |
| 611 | |
| 612 | part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; |
| 613 | dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", |
| 614 | part->remote_act_phys_cpuid); |
| 615 | |
| 616 | part->remote_amos_page_pa = remote_vars->amos_page_pa; |
| 617 | dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", |
| 618 | part->remote_amos_page_pa); |
| 619 | |
| 620 | xpc_activate_partition(part); |
| 621 | |
| 622 | } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa || |
| 623 | !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { |
| 624 | |
| 625 | part->reactivate_nasid = nasid; |
| 626 | XPC_DEACTIVATE_PARTITION(part, xpcReactivating); |
| 627 | } |
| 628 | } |
| 629 | |
| 630 | |
| 631 | /* |
| 632 | * Loop through the activation AMO variables and process any bits |
| 633 | * which are set. Each bit indicates a nasid sending a partition |
| 634 | * activation or deactivation request. |
| 635 | * |
| 636 | * Return #of IRQs detected. |
| 637 | */ |
| 638 | int |
| 639 | xpc_identify_act_IRQ_sender(void) |
| 640 | { |
| 641 | int word, bit; |
| 642 | u64 nasid_mask; |
| 643 | u64 nasid; /* remote nasid */ |
| 644 | int n_IRQs_detected = 0; |
| 645 | AMO_t *act_amos; |
| 646 | struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page; |
| 647 | |
| 648 | |
| 649 | act_amos = xpc_vars->act_amos; |
| 650 | |
| 651 | |
| 652 | /* scan through act AMO variable looking for non-zero entries */ |
| 653 | for (word = 0; word < XP_NASID_MASK_WORDS; word++) { |
| 654 | |
| 655 | nasid_mask = xpc_IPI_receive(&act_amos[word]); |
| 656 | if (nasid_mask == 0) { |
| 657 | /* no IRQs from nasids in this variable */ |
| 658 | continue; |
| 659 | } |
| 660 | |
| 661 | dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word, |
| 662 | nasid_mask); |
| 663 | |
| 664 | |
| 665 | /* |
| 666 | * If this nasid has been added to the machine since |
| 667 | * our partition was reset, this will retain the |
| 668 | * remote nasid in our reserved pages machine mask. |
| 669 | * This is used in the event of module reload. |
| 670 | */ |
| 671 | rp->mach_nasids[word] |= nasid_mask; |
| 672 | |
| 673 | |
| 674 | /* locate the nasid(s) which sent interrupts */ |
| 675 | |
| 676 | for (bit = 0; bit < (8 * sizeof(u64)); bit++) { |
| 677 | if (nasid_mask & (1UL << bit)) { |
| 678 | n_IRQs_detected++; |
| 679 | nasid = XPC_NASID_FROM_W_B(word, bit); |
| 680 | dev_dbg(xpc_part, "interrupt from nasid %ld\n", |
| 681 | nasid); |
| 682 | xpc_identify_act_IRQ_req(nasid); |
| 683 | } |
| 684 | } |
| 685 | } |
| 686 | return n_IRQs_detected; |
| 687 | } |
| 688 | |
| 689 | |
| 690 | /* |
| 691 | * Mark specified partition as active. |
| 692 | */ |
| 693 | enum xpc_retval |
| 694 | xpc_mark_partition_active(struct xpc_partition *part) |
| 695 | { |
| 696 | unsigned long irq_flags; |
| 697 | enum xpc_retval ret; |
| 698 | |
| 699 | |
| 700 | dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); |
| 701 | |
| 702 | spin_lock_irqsave(&part->act_lock, irq_flags); |
| 703 | if (part->act_state == XPC_P_ACTIVATING) { |
| 704 | part->act_state = XPC_P_ACTIVE; |
| 705 | ret = xpcSuccess; |
| 706 | } else { |
| 707 | DBUG_ON(part->reason == xpcSuccess); |
| 708 | ret = part->reason; |
| 709 | } |
| 710 | spin_unlock_irqrestore(&part->act_lock, irq_flags); |
| 711 | |
| 712 | return ret; |
| 713 | } |
| 714 | |
| 715 | |
| 716 | /* |
| 717 | * Notify XPC that the partition is down. |
| 718 | */ |
| 719 | void |
| 720 | xpc_deactivate_partition(const int line, struct xpc_partition *part, |
| 721 | enum xpc_retval reason) |
| 722 | { |
| 723 | unsigned long irq_flags; |
| 724 | partid_t partid = XPC_PARTID(part); |
| 725 | |
| 726 | |
| 727 | spin_lock_irqsave(&part->act_lock, irq_flags); |
| 728 | |
| 729 | if (part->act_state == XPC_P_INACTIVE) { |
| 730 | XPC_SET_REASON(part, reason, line); |
| 731 | spin_unlock_irqrestore(&part->act_lock, irq_flags); |
| 732 | if (reason == xpcReactivating) { |
| 733 | /* we interrupt ourselves to reactivate partition */ |
| 734 | xpc_IPI_send_reactivate(part); |
| 735 | } |
| 736 | return; |
| 737 | } |
| 738 | if (part->act_state == XPC_P_DEACTIVATING) { |
| 739 | if ((part->reason == xpcUnloading && reason != xpcUnloading) || |
| 740 | reason == xpcReactivating) { |
| 741 | XPC_SET_REASON(part, reason, line); |
| 742 | } |
| 743 | spin_unlock_irqrestore(&part->act_lock, irq_flags); |
| 744 | return; |
| 745 | } |
| 746 | |
| 747 | part->act_state = XPC_P_DEACTIVATING; |
| 748 | XPC_SET_REASON(part, reason, line); |
| 749 | |
| 750 | spin_unlock_irqrestore(&part->act_lock, irq_flags); |
| 751 | |
| 752 | XPC_DISALLOW_HB(partid, xpc_vars); |
| 753 | |
| 754 | dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid, |
| 755 | reason); |
| 756 | |
| 757 | xpc_partition_down(part, reason); |
| 758 | } |
| 759 | |
| 760 | |
| 761 | /* |
| 762 | * Mark specified partition as active. |
| 763 | */ |
| 764 | void |
| 765 | xpc_mark_partition_inactive(struct xpc_partition *part) |
| 766 | { |
| 767 | unsigned long irq_flags; |
| 768 | |
| 769 | |
| 770 | dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", |
| 771 | XPC_PARTID(part)); |
| 772 | |
| 773 | spin_lock_irqsave(&part->act_lock, irq_flags); |
| 774 | part->act_state = XPC_P_INACTIVE; |
| 775 | spin_unlock_irqrestore(&part->act_lock, irq_flags); |
| 776 | part->remote_rp_pa = 0; |
| 777 | } |
| 778 | |
| 779 | |
| 780 | /* |
| 781 | * SAL has provided a partition and machine mask. The partition mask |
| 782 | * contains a bit for each even nasid in our partition. The machine |
| 783 | * mask contains a bit for each even nasid in the entire machine. |
| 784 | * |
| 785 | * Using those two bit arrays, we can determine which nasids are |
| 786 | * known in the machine. Each should also have a reserved page |
| 787 | * initialized if they are available for partitioning. |
| 788 | */ |
| 789 | void |
| 790 | xpc_discovery(void) |
| 791 | { |
| 792 | void *remote_rp_base; |
| 793 | struct xpc_rsvd_page *remote_rp; |
| 794 | struct xpc_vars *remote_vars; |
| 795 | u64 remote_rsvd_page_pa; |
| 796 | u64 remote_vars_pa; |
| 797 | int region; |
| 798 | int max_regions; |
| 799 | int nasid; |
| 800 | struct xpc_rsvd_page *rp; |
| 801 | partid_t partid; |
| 802 | struct xpc_partition *part; |
| 803 | u64 *discovered_nasids; |
| 804 | enum xpc_retval ret; |
| 805 | |
| 806 | |
| 807 | remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE, |
| 808 | GFP_KERNEL, &remote_rp_base); |
| 809 | if (remote_rp == NULL) { |
| 810 | return; |
| 811 | } |
| 812 | remote_vars = (struct xpc_vars *) remote_rp; |
| 813 | |
| 814 | |
| 815 | discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS, |
| 816 | GFP_KERNEL); |
| 817 | if (discovered_nasids == NULL) { |
| 818 | kfree(remote_rp_base); |
| 819 | return; |
| 820 | } |
| 821 | memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS); |
| 822 | |
| 823 | rp = (struct xpc_rsvd_page *) xpc_rsvd_page; |
| 824 | |
| 825 | /* |
| 826 | * The term 'region' in this context refers to the minimum number of |
| 827 | * nodes that can comprise an access protection grouping. The access |
| 828 | * protection is in regards to memory, IOI and IPI. |
| 829 | */ |
| 830 | //>>> move the next two #defines into either include/asm-ia64/sn/arch.h or |
| 831 | //>>> include/asm-ia64/sn/addrs.h |
| 832 | #define SH1_MAX_REGIONS 64 |
| 833 | #define SH2_MAX_REGIONS 256 |
| 834 | max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS; |
| 835 | |
| 836 | for (region = 0; region < max_regions; region++) { |
| 837 | |
| 838 | if ((volatile int) xpc_exiting) { |
| 839 | break; |
| 840 | } |
| 841 | |
| 842 | dev_dbg(xpc_part, "searching region %d\n", region); |
| 843 | |
| 844 | for (nasid = (region * sn_region_size * 2); |
| 845 | nasid < ((region + 1) * sn_region_size * 2); |
| 846 | nasid += 2) { |
| 847 | |
| 848 | if ((volatile int) xpc_exiting) { |
| 849 | break; |
| 850 | } |
| 851 | |
| 852 | dev_dbg(xpc_part, "checking nasid %d\n", nasid); |
| 853 | |
| 854 | |
| 855 | if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) { |
| 856 | dev_dbg(xpc_part, "PROM indicates Nasid %d is " |
| 857 | "part of the local partition; skipping " |
| 858 | "region\n", nasid); |
| 859 | break; |
| 860 | } |
| 861 | |
| 862 | if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) { |
| 863 | dev_dbg(xpc_part, "PROM indicates Nasid %d was " |
| 864 | "not on Numa-Link network at reset\n", |
| 865 | nasid); |
| 866 | continue; |
| 867 | } |
| 868 | |
| 869 | if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { |
| 870 | dev_dbg(xpc_part, "Nasid %d is part of a " |
| 871 | "partition which was previously " |
| 872 | "discovered\n", nasid); |
| 873 | continue; |
| 874 | } |
| 875 | |
| 876 | |
| 877 | /* pull over the reserved page structure */ |
| 878 | |
| 879 | ret = xpc_get_remote_rp(nasid, discovered_nasids, |
| 880 | remote_rp, &remote_rsvd_page_pa); |
| 881 | if (ret != xpcSuccess) { |
| 882 | dev_dbg(xpc_part, "unable to get reserved page " |
| 883 | "from nasid %d, reason=%d\n", nasid, |
| 884 | ret); |
| 885 | |
| 886 | if (ret == xpcLocalPartid) { |
| 887 | break; |
| 888 | } |
| 889 | continue; |
| 890 | } |
| 891 | |
| 892 | remote_vars_pa = remote_rp->vars_pa; |
| 893 | |
| 894 | partid = remote_rp->partid; |
| 895 | part = &xpc_partitions[partid]; |
| 896 | |
| 897 | |
| 898 | /* pull over the cross partition variables */ |
| 899 | |
| 900 | ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); |
| 901 | if (ret != xpcSuccess) { |
| 902 | dev_dbg(xpc_part, "unable to get XPC variables " |
| 903 | "from nasid %d, reason=%d\n", nasid, |
| 904 | ret); |
| 905 | |
| 906 | XPC_DEACTIVATE_PARTITION(part, ret); |
| 907 | continue; |
| 908 | } |
| 909 | |
| 910 | if (part->act_state != XPC_P_INACTIVE) { |
| 911 | dev_dbg(xpc_part, "partition %d on nasid %d is " |
| 912 | "already activating\n", partid, nasid); |
| 913 | break; |
| 914 | } |
| 915 | |
| 916 | /* |
| 917 | * Register the remote partition's AMOs with SAL so it |
| 918 | * can handle and cleanup errors within that address |
| 919 | * range should the remote partition go down. We don't |
| 920 | * unregister this range because it is difficult to |
| 921 | * tell when outstanding writes to the remote partition |
| 922 | * are finished and thus when it is thus safe to |
| 923 | * unregister. This should not result in wasted space |
| 924 | * in the SAL xp_addr_region table because we should |
| 925 | * get the same page for remote_act_amos_pa after |
| 926 | * module reloads and system reboots. |
| 927 | */ |
| 928 | if (sn_register_xp_addr_region( |
| 929 | remote_vars->amos_page_pa, |
| 930 | PAGE_SIZE, 1) < 0) { |
| 931 | dev_dbg(xpc_part, "partition %d failed to " |
| 932 | "register xp_addr region 0x%016lx\n", |
| 933 | partid, remote_vars->amos_page_pa); |
| 934 | |
| 935 | XPC_SET_REASON(part, xpcPhysAddrRegFailed, |
| 936 | __LINE__); |
| 937 | break; |
| 938 | } |
| 939 | |
| 940 | /* |
| 941 | * The remote nasid is valid and available. |
| 942 | * Send an interrupt to that nasid to notify |
| 943 | * it that we are ready to begin activation. |
| 944 | */ |
| 945 | dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, " |
| 946 | "nasid %d, phys_cpuid 0x%x\n", |
| 947 | remote_vars->amos_page_pa, |
| 948 | remote_vars->act_nasid, |
| 949 | remote_vars->act_phys_cpuid); |
| 950 | |
| 951 | xpc_IPI_send_activate(remote_vars); |
| 952 | } |
| 953 | } |
| 954 | |
| 955 | kfree(discovered_nasids); |
| 956 | kfree(remote_rp_base); |
| 957 | } |
| 958 | |
| 959 | |
| 960 | /* |
| 961 | * Given a partid, get the nasids owned by that partition from the |
Dean Nelson | 3a7d555 | 2005-04-04 13:14:00 -0700 | [diff] [blame] | 962 | * remote partition's reserved page. |
Dean Nelson | 89eb8eb | 2005-03-23 19:50:00 -0700 | [diff] [blame] | 963 | */ |
| 964 | enum xpc_retval |
| 965 | xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask) |
| 966 | { |
| 967 | struct xpc_partition *part; |
| 968 | u64 part_nasid_pa; |
| 969 | int bte_res; |
| 970 | |
| 971 | |
| 972 | part = &xpc_partitions[partid]; |
| 973 | if (part->remote_rp_pa == 0) { |
| 974 | return xpcPartitionDown; |
| 975 | } |
| 976 | |
| 977 | part_nasid_pa = part->remote_rp_pa + |
| 978 | (u64) &((struct xpc_rsvd_page *) 0)->part_nasids; |
| 979 | |
| 980 | bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask), |
| 981 | L1_CACHE_ALIGN(XP_NASID_MASK_BYTES), |
| 982 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); |
| 983 | |
| 984 | return xpc_map_bte_errors(bte_res); |
| 985 | } |
| 986 | |