i7core_edac: explicitly remove PCI devices from the devices list
[linux-2.6.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 /*
48  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
49  * registers start at bus 255, and are not reported by BIOS.
50  * We currently find devices with only 2 sockets. In order to support more QPI
51  * Quick Path Interconnect, just increment this number.
52  */
53 #define MAX_SOCKET_BUSES        2
54
55
56 /*
57  * Alter this version for the module when modifications are made
58  */
59 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
60 #define EDAC_MOD_STR      "i7core_edac"
61
62 /*
63  * Debug macros
64  */
65 #define i7core_printk(level, fmt, arg...)                       \
66         edac_printk(level, "i7core", fmt, ##arg)
67
68 #define i7core_mc_printk(mci, level, fmt, arg...)               \
69         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
70
71 /*
72  * i7core Memory Controller Registers
73  */
74
75         /* OFFSETS for Device 0 Function 0 */
76
77 #define MC_CFG_CONTROL  0x90
78
79         /* OFFSETS for Device 3 Function 0 */
80
81 #define MC_CONTROL      0x48
82 #define MC_STATUS       0x4c
83 #define MC_MAX_DOD      0x64
84
85 /*
86  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
87  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
88  */
89
90 #define MC_TEST_ERR_RCV1        0x60
91   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
92
93 #define MC_TEST_ERR_RCV0        0x64
94   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
95   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
96
97 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
98 #define MC_COR_ECC_CNT_0        0x80
99 #define MC_COR_ECC_CNT_1        0x84
100 #define MC_COR_ECC_CNT_2        0x88
101 #define MC_COR_ECC_CNT_3        0x8c
102 #define MC_COR_ECC_CNT_4        0x90
103 #define MC_COR_ECC_CNT_5        0x94
104
105 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
106 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
107
108
109         /* OFFSETS for Devices 4,5 and 6 Function 0 */
110
111 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
112   #define THREE_DIMMS_PRESENT           (1 << 24)
113   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
114   #define QUAD_RANK_PRESENT             (1 << 22)
115   #define REGISTERED_DIMM               (1 << 15)
116
117 #define MC_CHANNEL_MAPPER       0x60
118   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
119   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
120
121 #define MC_CHANNEL_RANK_PRESENT 0x7c
122   #define RANK_PRESENT_MASK             0xffff
123
124 #define MC_CHANNEL_ADDR_MATCH   0xf0
125 #define MC_CHANNEL_ERROR_MASK   0xf8
126 #define MC_CHANNEL_ERROR_INJECT 0xfc
127   #define INJECT_ADDR_PARITY    0x10
128   #define INJECT_ECC            0x08
129   #define MASK_CACHELINE        0x06
130   #define MASK_FULL_CACHELINE   0x06
131   #define MASK_MSB32_CACHELINE  0x04
132   #define MASK_LSB32_CACHELINE  0x02
133   #define NO_MASK_CACHELINE     0x00
134   #define REPEAT_EN             0x01
135
136         /* OFFSETS for Devices 4,5 and 6 Function 1 */
137
138 #define MC_DOD_CH_DIMM0         0x48
139 #define MC_DOD_CH_DIMM1         0x4c
140 #define MC_DOD_CH_DIMM2         0x50
141   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
142   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
143   #define DIMM_PRESENT_MASK     (1 << 9)
144   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
145   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
146   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
147   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
148   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
149   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
150   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
151   #define MC_DOD_NUMCOL_MASK            3
152   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
153
154 #define MC_RANK_PRESENT         0x7c
155
156 #define MC_SAG_CH_0     0x80
157 #define MC_SAG_CH_1     0x84
158 #define MC_SAG_CH_2     0x88
159 #define MC_SAG_CH_3     0x8c
160 #define MC_SAG_CH_4     0x90
161 #define MC_SAG_CH_5     0x94
162 #define MC_SAG_CH_6     0x98
163 #define MC_SAG_CH_7     0x9c
164
165 #define MC_RIR_LIMIT_CH_0       0x40
166 #define MC_RIR_LIMIT_CH_1       0x44
167 #define MC_RIR_LIMIT_CH_2       0x48
168 #define MC_RIR_LIMIT_CH_3       0x4C
169 #define MC_RIR_LIMIT_CH_4       0x50
170 #define MC_RIR_LIMIT_CH_5       0x54
171 #define MC_RIR_LIMIT_CH_6       0x58
172 #define MC_RIR_LIMIT_CH_7       0x5C
173 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
174
175 #define MC_RIR_WAY_CH           0x80
176   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
177   #define MC_RIR_WAY_RANK_MASK          0x7
178
179 /*
180  * i7core structs
181  */
182
183 #define NUM_CHANS 3
184 #define MAX_DIMMS 3             /* Max DIMMS per channel */
185 #define MAX_MCR_FUNC  4
186 #define MAX_CHAN_FUNC 3
187
188 struct i7core_info {
189         u32     mc_control;
190         u32     mc_status;
191         u32     max_dod;
192         u32     ch_map;
193 };
194
195
196 struct i7core_inject {
197         int     enable;
198
199         u32     section;
200         u32     type;
201         u32     eccmask;
202
203         /* Error address mask */
204         int channel, dimm, rank, bank, page, col;
205 };
206
207 struct i7core_channel {
208         u32             ranks;
209         u32             dimms;
210 };
211
212 struct pci_id_descr {
213         int                     dev;
214         int                     func;
215         int                     dev_id;
216         int                     optional;
217 };
218
219 struct pci_id_table {
220         const struct pci_id_descr       *descr;
221         int                             n_devs;
222 };
223
224 struct i7core_dev {
225         struct list_head        list;
226         u8                      socket;
227         struct pci_dev          **pdev;
228         int                     n_devs;
229         struct mem_ctl_info     *mci;
230 };
231
232 struct i7core_pvt {
233         struct pci_dev  *pci_noncore;
234         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
235         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
236
237         struct i7core_dev *i7core_dev;
238
239         struct i7core_info      info;
240         struct i7core_inject    inject;
241         struct i7core_channel   channel[NUM_CHANS];
242
243         int             channels; /* Number of active channels */
244
245         int             ce_count_available;
246         int             csrow_map[NUM_CHANS][MAX_DIMMS];
247
248                         /* ECC corrected errors counts per udimm */
249         unsigned long   udimm_ce_count[MAX_DIMMS];
250         int             udimm_last_ce_count[MAX_DIMMS];
251                         /* ECC corrected errors counts per rdimm */
252         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
253         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
254
255         unsigned int    is_registered;
256
257         /* mcelog glue */
258         struct edac_mce         edac_mce;
259
260         /* Fifo double buffers */
261         struct mce              mce_entry[MCE_LOG_LEN];
262         struct mce              mce_outentry[MCE_LOG_LEN];
263
264         /* Fifo in/out counters */
265         unsigned                mce_in, mce_out;
266
267         /* Count indicator to show errors not got */
268         unsigned                mce_overrun;
269
270         /* Struct to control EDAC polling */
271         struct edac_pci_ctl_info *i7core_pci;
272 };
273
274 #define PCI_DESCR(device, function, device_id)  \
275         .dev = (device),                        \
276         .func = (function),                     \
277         .dev_id = (device_id)
278
279 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
280                 /* Memory controller */
281         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
282         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
283                         /* Exists only for RDIMM */
284         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
285         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
286
287                 /* Channel 0 */
288         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
289         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
290         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
291         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
292
293                 /* Channel 1 */
294         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
295         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
296         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
297         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
298
299                 /* Channel 2 */
300         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
301         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
302         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
303         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
304
305                 /* Generic Non-core registers */
306         /*
307          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
308          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
309          * the probing code needs to test for the other address in case of
310          * failure of this one
311          */
312         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
313
314 };
315
316 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
317         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
318         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
319         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
320
321         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
322         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
323         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
324         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
325
326         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
327         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
328         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
329         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
330
331         /*
332          * This is the PCI device has an alternate address on some
333          * processors like Core i7 860
334          */
335         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
336 };
337
338 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
339                 /* Memory controller */
340         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
341         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
342                         /* Exists only for RDIMM */
343         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
344         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
345
346                 /* Channel 0 */
347         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
348         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
349         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
350         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
351
352                 /* Channel 1 */
353         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
354         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
355         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
356         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
357
358                 /* Channel 2 */
359         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
360         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
361         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
362         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
363
364                 /* Generic Non-core registers */
365         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
366
367 };
368
369 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
370 static const struct pci_id_table pci_dev_table[] = {
371         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
372         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
373         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
374 };
375
376 /*
377  *      pci_device_id   table for which devices we are looking for
378  */
379 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
380         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
381         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
382         {0,}                    /* 0 terminated list. */
383 };
384
385 /****************************************************************************
386                         Anciliary status routines
387  ****************************************************************************/
388
389         /* MC_CONTROL bits */
390 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
391 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
392
393         /* MC_STATUS bits */
394 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
395 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
396
397         /* MC_MAX_DOD read functions */
398 static inline int numdimms(u32 dimms)
399 {
400         return (dimms & 0x3) + 1;
401 }
402
403 static inline int numrank(u32 rank)
404 {
405         static int ranks[4] = { 1, 2, 4, -EINVAL };
406
407         return ranks[rank & 0x3];
408 }
409
410 static inline int numbank(u32 bank)
411 {
412         static int banks[4] = { 4, 8, 16, -EINVAL };
413
414         return banks[bank & 0x3];
415 }
416
417 static inline int numrow(u32 row)
418 {
419         static int rows[8] = {
420                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
421                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
422         };
423
424         return rows[row & 0x7];
425 }
426
427 static inline int numcol(u32 col)
428 {
429         static int cols[8] = {
430                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
431         };
432         return cols[col & 0x3];
433 }
434
435 static struct i7core_dev *get_i7core_dev(u8 socket)
436 {
437         struct i7core_dev *i7core_dev;
438
439         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
440                 if (i7core_dev->socket == socket)
441                         return i7core_dev;
442         }
443
444         return NULL;
445 }
446
447 /****************************************************************************
448                         Memory check routines
449  ****************************************************************************/
450 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
451                                           unsigned func)
452 {
453         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
454         int i;
455
456         if (!i7core_dev)
457                 return NULL;
458
459         for (i = 0; i < i7core_dev->n_devs; i++) {
460                 if (!i7core_dev->pdev[i])
461                         continue;
462
463                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
464                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
465                         return i7core_dev->pdev[i];
466                 }
467         }
468
469         return NULL;
470 }
471
472 /**
473  * i7core_get_active_channels() - gets the number of channels and csrows
474  * @socket:     Quick Path Interconnect socket
475  * @channels:   Number of channels that will be returned
476  * @csrows:     Number of csrows found
477  *
478  * Since EDAC core needs to know in advance the number of available channels
479  * and csrows, in order to allocate memory for csrows/channels, it is needed
480  * to run two similar steps. At the first step, implemented on this function,
481  * it checks the number of csrows/channels present at one socket.
482  * this is used in order to properly allocate the size of mci components.
483  *
484  * It should be noticed that none of the current available datasheets explain
485  * or even mention how csrows are seen by the memory controller. So, we need
486  * to add a fake description for csrows.
487  * So, this driver is attributing one DIMM memory for one csrow.
488  */
489 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
490                                       unsigned *csrows)
491 {
492         struct pci_dev *pdev = NULL;
493         int i, j;
494         u32 status, control;
495
496         *channels = 0;
497         *csrows = 0;
498
499         pdev = get_pdev_slot_func(socket, 3, 0);
500         if (!pdev) {
501                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
502                               socket);
503                 return -ENODEV;
504         }
505
506         /* Device 3 function 0 reads */
507         pci_read_config_dword(pdev, MC_STATUS, &status);
508         pci_read_config_dword(pdev, MC_CONTROL, &control);
509
510         for (i = 0; i < NUM_CHANS; i++) {
511                 u32 dimm_dod[3];
512                 /* Check if the channel is active */
513                 if (!(control & (1 << (8 + i))))
514                         continue;
515
516                 /* Check if the channel is disabled */
517                 if (status & (1 << i))
518                         continue;
519
520                 pdev = get_pdev_slot_func(socket, i + 4, 1);
521                 if (!pdev) {
522                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
523                                                 "fn %d.%d!!!\n",
524                                                 socket, i + 4, 1);
525                         return -ENODEV;
526                 }
527                 /* Devices 4-6 function 1 */
528                 pci_read_config_dword(pdev,
529                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
530                 pci_read_config_dword(pdev,
531                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
532                 pci_read_config_dword(pdev,
533                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
534
535                 (*channels)++;
536
537                 for (j = 0; j < 3; j++) {
538                         if (!DIMM_PRESENT(dimm_dod[j]))
539                                 continue;
540                         (*csrows)++;
541                 }
542         }
543
544         debugf0("Number of active channels on socket %d: %d\n",
545                 socket, *channels);
546
547         return 0;
548 }
549
550 static int get_dimm_config(const struct mem_ctl_info *mci, int *csrow)
551 {
552         struct i7core_pvt *pvt = mci->pvt_info;
553         struct csrow_info *csr;
554         struct pci_dev *pdev;
555         int i, j;
556         unsigned long last_page = 0;
557         enum edac_type mode;
558         enum mem_type mtype;
559
560         /* Get data from the MC register, function 0 */
561         pdev = pvt->pci_mcr[0];
562         if (!pdev)
563                 return -ENODEV;
564
565         /* Device 3 function 0 reads */
566         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
567         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
568         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
569         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
570
571         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
572                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
573                 pvt->info.max_dod, pvt->info.ch_map);
574
575         if (ECC_ENABLED(pvt)) {
576                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
577                 if (ECCx8(pvt))
578                         mode = EDAC_S8ECD8ED;
579                 else
580                         mode = EDAC_S4ECD4ED;
581         } else {
582                 debugf0("ECC disabled\n");
583                 mode = EDAC_NONE;
584         }
585
586         /* FIXME: need to handle the error codes */
587         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
588                 "x%x x 0x%x\n",
589                 numdimms(pvt->info.max_dod),
590                 numrank(pvt->info.max_dod >> 2),
591                 numbank(pvt->info.max_dod >> 4),
592                 numrow(pvt->info.max_dod >> 6),
593                 numcol(pvt->info.max_dod >> 9));
594
595         for (i = 0; i < NUM_CHANS; i++) {
596                 u32 data, dimm_dod[3], value[8];
597
598                 if (!pvt->pci_ch[i][0])
599                         continue;
600
601                 if (!CH_ACTIVE(pvt, i)) {
602                         debugf0("Channel %i is not active\n", i);
603                         continue;
604                 }
605                 if (CH_DISABLED(pvt, i)) {
606                         debugf0("Channel %i is disabled\n", i);
607                         continue;
608                 }
609
610                 /* Devices 4-6 function 0 */
611                 pci_read_config_dword(pvt->pci_ch[i][0],
612                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
613
614                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
615                                                 4 : 2;
616
617                 if (data & REGISTERED_DIMM)
618                         mtype = MEM_RDDR3;
619                 else
620                         mtype = MEM_DDR3;
621 #if 0
622                 if (data & THREE_DIMMS_PRESENT)
623                         pvt->channel[i].dimms = 3;
624                 else if (data & SINGLE_QUAD_RANK_PRESENT)
625                         pvt->channel[i].dimms = 1;
626                 else
627                         pvt->channel[i].dimms = 2;
628 #endif
629
630                 /* Devices 4-6 function 1 */
631                 pci_read_config_dword(pvt->pci_ch[i][1],
632                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
633                 pci_read_config_dword(pvt->pci_ch[i][1],
634                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
635                 pci_read_config_dword(pvt->pci_ch[i][1],
636                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
637
638                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
639                         "%d ranks, %cDIMMs\n",
640                         i,
641                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
642                         data,
643                         pvt->channel[i].ranks,
644                         (data & REGISTERED_DIMM) ? 'R' : 'U');
645
646                 for (j = 0; j < 3; j++) {
647                         u32 banks, ranks, rows, cols;
648                         u32 size, npages;
649
650                         if (!DIMM_PRESENT(dimm_dod[j]))
651                                 continue;
652
653                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
654                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
655                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
656                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
657
658                         /* DDR3 has 8 I/O banks */
659                         size = (rows * cols * banks * ranks) >> (20 - 3);
660
661                         pvt->channel[i].dimms++;
662
663                         debugf0("\tdimm %d %d Mb offset: %x, "
664                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
665                                 j, size,
666                                 RANKOFFSET(dimm_dod[j]),
667                                 banks, ranks, rows, cols);
668
669                         npages = MiB_TO_PAGES(size);
670
671                         csr = &mci->csrows[*csrow];
672                         csr->first_page = last_page + 1;
673                         last_page += npages;
674                         csr->last_page = last_page;
675                         csr->nr_pages = npages;
676
677                         csr->page_mask = 0;
678                         csr->grain = 8;
679                         csr->csrow_idx = *csrow;
680                         csr->nr_channels = 1;
681
682                         csr->channels[0].chan_idx = i;
683                         csr->channels[0].ce_count = 0;
684
685                         pvt->csrow_map[i][j] = *csrow;
686
687                         switch (banks) {
688                         case 4:
689                                 csr->dtype = DEV_X4;
690                                 break;
691                         case 8:
692                                 csr->dtype = DEV_X8;
693                                 break;
694                         case 16:
695                                 csr->dtype = DEV_X16;
696                                 break;
697                         default:
698                                 csr->dtype = DEV_UNKNOWN;
699                         }
700
701                         csr->edac_mode = mode;
702                         csr->mtype = mtype;
703
704                         (*csrow)++;
705                 }
706
707                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
708                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
709                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
710                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
711                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
712                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
713                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
714                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
715                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
716                 for (j = 0; j < 8; j++)
717                         debugf1("\t\t%#x\t%#x\t%#x\n",
718                                 (value[j] >> 27) & 0x1,
719                                 (value[j] >> 24) & 0x7,
720                                 (value[j] && ((1 << 24) - 1)));
721         }
722
723         return 0;
724 }
725
726 /****************************************************************************
727                         Error insertion routines
728  ****************************************************************************/
729
730 /* The i7core has independent error injection features per channel.
731    However, to have a simpler code, we don't allow enabling error injection
732    on more than one channel.
733    Also, since a change at an inject parameter will be applied only at enable,
734    we're disabling error injection on all write calls to the sysfs nodes that
735    controls the error code injection.
736  */
737 static int disable_inject(const struct mem_ctl_info *mci)
738 {
739         struct i7core_pvt *pvt = mci->pvt_info;
740
741         pvt->inject.enable = 0;
742
743         if (!pvt->pci_ch[pvt->inject.channel][0])
744                 return -ENODEV;
745
746         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
747                                 MC_CHANNEL_ERROR_INJECT, 0);
748
749         return 0;
750 }
751
752 /*
753  * i7core inject inject.section
754  *
755  *      accept and store error injection inject.section value
756  *      bit 0 - refers to the lower 32-byte half cacheline
757  *      bit 1 - refers to the upper 32-byte half cacheline
758  */
759 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
760                                            const char *data, size_t count)
761 {
762         struct i7core_pvt *pvt = mci->pvt_info;
763         unsigned long value;
764         int rc;
765
766         if (pvt->inject.enable)
767                 disable_inject(mci);
768
769         rc = strict_strtoul(data, 10, &value);
770         if ((rc < 0) || (value > 3))
771                 return -EIO;
772
773         pvt->inject.section = (u32) value;
774         return count;
775 }
776
777 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
778                                               char *data)
779 {
780         struct i7core_pvt *pvt = mci->pvt_info;
781         return sprintf(data, "0x%08x\n", pvt->inject.section);
782 }
783
784 /*
785  * i7core inject.type
786  *
787  *      accept and store error injection inject.section value
788  *      bit 0 - repeat enable - Enable error repetition
789  *      bit 1 - inject ECC error
790  *      bit 2 - inject parity error
791  */
792 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
793                                         const char *data, size_t count)
794 {
795         struct i7core_pvt *pvt = mci->pvt_info;
796         unsigned long value;
797         int rc;
798
799         if (pvt->inject.enable)
800                 disable_inject(mci);
801
802         rc = strict_strtoul(data, 10, &value);
803         if ((rc < 0) || (value > 7))
804                 return -EIO;
805
806         pvt->inject.type = (u32) value;
807         return count;
808 }
809
810 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
811                                               char *data)
812 {
813         struct i7core_pvt *pvt = mci->pvt_info;
814         return sprintf(data, "0x%08x\n", pvt->inject.type);
815 }
816
817 /*
818  * i7core_inject_inject.eccmask_store
819  *
820  * The type of error (UE/CE) will depend on the inject.eccmask value:
821  *   Any bits set to a 1 will flip the corresponding ECC bit
822  *   Correctable errors can be injected by flipping 1 bit or the bits within
823  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
824  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
825  *   uncorrectable error to be injected.
826  */
827 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
828                                         const char *data, size_t count)
829 {
830         struct i7core_pvt *pvt = mci->pvt_info;
831         unsigned long value;
832         int rc;
833
834         if (pvt->inject.enable)
835                 disable_inject(mci);
836
837         rc = strict_strtoul(data, 10, &value);
838         if (rc < 0)
839                 return -EIO;
840
841         pvt->inject.eccmask = (u32) value;
842         return count;
843 }
844
845 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
846                                               char *data)
847 {
848         struct i7core_pvt *pvt = mci->pvt_info;
849         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
850 }
851
852 /*
853  * i7core_addrmatch
854  *
855  * The type of error (UE/CE) will depend on the inject.eccmask value:
856  *   Any bits set to a 1 will flip the corresponding ECC bit
857  *   Correctable errors can be injected by flipping 1 bit or the bits within
858  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
859  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
860  *   uncorrectable error to be injected.
861  */
862
863 #define DECLARE_ADDR_MATCH(param, limit)                        \
864 static ssize_t i7core_inject_store_##param(                     \
865                 struct mem_ctl_info *mci,                       \
866                 const char *data, size_t count)                 \
867 {                                                               \
868         struct i7core_pvt *pvt;                                 \
869         long value;                                             \
870         int rc;                                                 \
871                                                                 \
872         debugf1("%s()\n", __func__);                            \
873         pvt = mci->pvt_info;                                    \
874                                                                 \
875         if (pvt->inject.enable)                                 \
876                 disable_inject(mci);                            \
877                                                                 \
878         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
879                 value = -1;                                     \
880         else {                                                  \
881                 rc = strict_strtoul(data, 10, &value);          \
882                 if ((rc < 0) || (value >= limit))               \
883                         return -EIO;                            \
884         }                                                       \
885                                                                 \
886         pvt->inject.param = value;                              \
887                                                                 \
888         return count;                                           \
889 }                                                               \
890                                                                 \
891 static ssize_t i7core_inject_show_##param(                      \
892                 struct mem_ctl_info *mci,                       \
893                 char *data)                                     \
894 {                                                               \
895         struct i7core_pvt *pvt;                                 \
896                                                                 \
897         pvt = mci->pvt_info;                                    \
898         debugf1("%s() pvt=%p\n", __func__, pvt);                \
899         if (pvt->inject.param < 0)                              \
900                 return sprintf(data, "any\n");                  \
901         else                                                    \
902                 return sprintf(data, "%d\n", pvt->inject.param);\
903 }
904
905 #define ATTR_ADDR_MATCH(param)                                  \
906         {                                                       \
907                 .attr = {                                       \
908                         .name = #param,                         \
909                         .mode = (S_IRUGO | S_IWUSR)             \
910                 },                                              \
911                 .show  = i7core_inject_show_##param,            \
912                 .store = i7core_inject_store_##param,           \
913         }
914
915 DECLARE_ADDR_MATCH(channel, 3);
916 DECLARE_ADDR_MATCH(dimm, 3);
917 DECLARE_ADDR_MATCH(rank, 4);
918 DECLARE_ADDR_MATCH(bank, 32);
919 DECLARE_ADDR_MATCH(page, 0x10000);
920 DECLARE_ADDR_MATCH(col, 0x4000);
921
922 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
923 {
924         u32 read;
925         int count;
926
927         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
928                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
929                 where, val);
930
931         for (count = 0; count < 10; count++) {
932                 if (count)
933                         msleep(100);
934                 pci_write_config_dword(dev, where, val);
935                 pci_read_config_dword(dev, where, &read);
936
937                 if (read == val)
938                         return 0;
939         }
940
941         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
942                 "write=%08x. Read=%08x\n",
943                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
944                 where, val, read);
945
946         return -EINVAL;
947 }
948
949 /*
950  * This routine prepares the Memory Controller for error injection.
951  * The error will be injected when some process tries to write to the
952  * memory that matches the given criteria.
953  * The criteria can be set in terms of a mask where dimm, rank, bank, page
954  * and col can be specified.
955  * A -1 value for any of the mask items will make the MCU to ignore
956  * that matching criteria for error injection.
957  *
958  * It should be noticed that the error will only happen after a write operation
959  * on a memory that matches the condition. if REPEAT_EN is not enabled at
960  * inject mask, then it will produce just one error. Otherwise, it will repeat
961  * until the injectmask would be cleaned.
962  *
963  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
964  *    is reliable enough to check if the MC is using the
965  *    three channels. However, this is not clear at the datasheet.
966  */
967 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
968                                        const char *data, size_t count)
969 {
970         struct i7core_pvt *pvt = mci->pvt_info;
971         u32 injectmask;
972         u64 mask = 0;
973         int  rc;
974         long enable;
975
976         if (!pvt->pci_ch[pvt->inject.channel][0])
977                 return 0;
978
979         rc = strict_strtoul(data, 10, &enable);
980         if ((rc < 0))
981                 return 0;
982
983         if (enable) {
984                 pvt->inject.enable = 1;
985         } else {
986                 disable_inject(mci);
987                 return count;
988         }
989
990         /* Sets pvt->inject.dimm mask */
991         if (pvt->inject.dimm < 0)
992                 mask |= 1LL << 41;
993         else {
994                 if (pvt->channel[pvt->inject.channel].dimms > 2)
995                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
996                 else
997                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
998         }
999
1000         /* Sets pvt->inject.rank mask */
1001         if (pvt->inject.rank < 0)
1002                 mask |= 1LL << 40;
1003         else {
1004                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1005                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1006                 else
1007                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1008         }
1009
1010         /* Sets pvt->inject.bank mask */
1011         if (pvt->inject.bank < 0)
1012                 mask |= 1LL << 39;
1013         else
1014                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1015
1016         /* Sets pvt->inject.page mask */
1017         if (pvt->inject.page < 0)
1018                 mask |= 1LL << 38;
1019         else
1020                 mask |= (pvt->inject.page & 0xffff) << 14;
1021
1022         /* Sets pvt->inject.column mask */
1023         if (pvt->inject.col < 0)
1024                 mask |= 1LL << 37;
1025         else
1026                 mask |= (pvt->inject.col & 0x3fff);
1027
1028         /*
1029          * bit    0: REPEAT_EN
1030          * bits 1-2: MASK_HALF_CACHELINE
1031          * bit    3: INJECT_ECC
1032          * bit    4: INJECT_ADDR_PARITY
1033          */
1034
1035         injectmask = (pvt->inject.type & 1) |
1036                      (pvt->inject.section & 0x3) << 1 |
1037                      (pvt->inject.type & 0x6) << (3 - 1);
1038
1039         /* Unlock writes to registers - this register is write only */
1040         pci_write_config_dword(pvt->pci_noncore,
1041                                MC_CFG_CONTROL, 0x2);
1042
1043         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1044                                MC_CHANNEL_ADDR_MATCH, mask);
1045         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1046                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1047
1048         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1049                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1050
1051         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1052                                MC_CHANNEL_ERROR_INJECT, injectmask);
1053
1054         /*
1055          * This is something undocumented, based on my tests
1056          * Without writing 8 to this register, errors aren't injected. Not sure
1057          * why.
1058          */
1059         pci_write_config_dword(pvt->pci_noncore,
1060                                MC_CFG_CONTROL, 8);
1061
1062         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1063                 " inject 0x%08x\n",
1064                 mask, pvt->inject.eccmask, injectmask);
1065
1066
1067         return count;
1068 }
1069
1070 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1071                                         char *data)
1072 {
1073         struct i7core_pvt *pvt = mci->pvt_info;
1074         u32 injectmask;
1075
1076         if (!pvt->pci_ch[pvt->inject.channel][0])
1077                 return 0;
1078
1079         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1080                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1081
1082         debugf0("Inject error read: 0x%018x\n", injectmask);
1083
1084         if (injectmask & 0x0c)
1085                 pvt->inject.enable = 1;
1086
1087         return sprintf(data, "%d\n", pvt->inject.enable);
1088 }
1089
1090 #define DECLARE_COUNTER(param)                                  \
1091 static ssize_t i7core_show_counter_##param(                     \
1092                 struct mem_ctl_info *mci,                       \
1093                 char *data)                                     \
1094 {                                                               \
1095         struct i7core_pvt *pvt = mci->pvt_info;                 \
1096                                                                 \
1097         debugf1("%s() \n", __func__);                           \
1098         if (!pvt->ce_count_available || (pvt->is_registered))   \
1099                 return sprintf(data, "data unavailable\n");     \
1100         return sprintf(data, "%lu\n",                           \
1101                         pvt->udimm_ce_count[param]);            \
1102 }
1103
1104 #define ATTR_COUNTER(param)                                     \
1105         {                                                       \
1106                 .attr = {                                       \
1107                         .name = __stringify(udimm##param),      \
1108                         .mode = (S_IRUGO | S_IWUSR)             \
1109                 },                                              \
1110                 .show  = i7core_show_counter_##param            \
1111         }
1112
1113 DECLARE_COUNTER(0);
1114 DECLARE_COUNTER(1);
1115 DECLARE_COUNTER(2);
1116
1117 /*
1118  * Sysfs struct
1119  */
1120
1121 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1122         ATTR_ADDR_MATCH(channel),
1123         ATTR_ADDR_MATCH(dimm),
1124         ATTR_ADDR_MATCH(rank),
1125         ATTR_ADDR_MATCH(bank),
1126         ATTR_ADDR_MATCH(page),
1127         ATTR_ADDR_MATCH(col),
1128         { } /* End of list */
1129 };
1130
1131 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1132         .name  = "inject_addrmatch",
1133         .mcidev_attr = i7core_addrmatch_attrs,
1134 };
1135
1136 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1137         ATTR_COUNTER(0),
1138         ATTR_COUNTER(1),
1139         ATTR_COUNTER(2),
1140         { .attr = { .name = NULL } }
1141 };
1142
1143 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1144         .name  = "all_channel_counts",
1145         .mcidev_attr = i7core_udimm_counters_attrs,
1146 };
1147
1148 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1149         {
1150                 .attr = {
1151                         .name = "inject_section",
1152                         .mode = (S_IRUGO | S_IWUSR)
1153                 },
1154                 .show  = i7core_inject_section_show,
1155                 .store = i7core_inject_section_store,
1156         }, {
1157                 .attr = {
1158                         .name = "inject_type",
1159                         .mode = (S_IRUGO | S_IWUSR)
1160                 },
1161                 .show  = i7core_inject_type_show,
1162                 .store = i7core_inject_type_store,
1163         }, {
1164                 .attr = {
1165                         .name = "inject_eccmask",
1166                         .mode = (S_IRUGO | S_IWUSR)
1167                 },
1168                 .show  = i7core_inject_eccmask_show,
1169                 .store = i7core_inject_eccmask_store,
1170         }, {
1171                 .grp = &i7core_inject_addrmatch,
1172         }, {
1173                 .attr = {
1174                         .name = "inject_enable",
1175                         .mode = (S_IRUGO | S_IWUSR)
1176                 },
1177                 .show  = i7core_inject_enable_show,
1178                 .store = i7core_inject_enable_store,
1179         },
1180         { }     /* End of list */
1181 };
1182
1183 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1184         {
1185                 .attr = {
1186                         .name = "inject_section",
1187                         .mode = (S_IRUGO | S_IWUSR)
1188                 },
1189                 .show  = i7core_inject_section_show,
1190                 .store = i7core_inject_section_store,
1191         }, {
1192                 .attr = {
1193                         .name = "inject_type",
1194                         .mode = (S_IRUGO | S_IWUSR)
1195                 },
1196                 .show  = i7core_inject_type_show,
1197                 .store = i7core_inject_type_store,
1198         }, {
1199                 .attr = {
1200                         .name = "inject_eccmask",
1201                         .mode = (S_IRUGO | S_IWUSR)
1202                 },
1203                 .show  = i7core_inject_eccmask_show,
1204                 .store = i7core_inject_eccmask_store,
1205         }, {
1206                 .grp = &i7core_inject_addrmatch,
1207         }, {
1208                 .attr = {
1209                         .name = "inject_enable",
1210                         .mode = (S_IRUGO | S_IWUSR)
1211                 },
1212                 .show  = i7core_inject_enable_show,
1213                 .store = i7core_inject_enable_store,
1214         }, {
1215                 .grp = &i7core_udimm_counters,
1216         },
1217         { }     /* End of list */
1218 };
1219
1220 /****************************************************************************
1221         Device initialization routines: put/get, init/exit
1222  ****************************************************************************/
1223
1224 /*
1225  *      i7core_put_devices      'put' all the devices that we have
1226  *                              reserved via 'get'
1227  */
1228 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1229 {
1230         int i;
1231
1232         debugf0(__FILE__ ": %s()\n", __func__);
1233         for (i = 0; i < i7core_dev->n_devs; i++) {
1234                 struct pci_dev *pdev = i7core_dev->pdev[i];
1235                 if (!pdev)
1236                         continue;
1237                 debugf0("Removing dev %02x:%02x.%d\n",
1238                         pdev->bus->number,
1239                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1240                 pci_dev_put(pdev);
1241         }
1242         kfree(i7core_dev->pdev);
1243 }
1244
1245 static void i7core_put_all_devices(void)
1246 {
1247         struct i7core_dev *i7core_dev, *tmp;
1248
1249         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1250                 i7core_put_devices(i7core_dev);
1251                 list_del(&i7core_dev->list);
1252                 kfree(i7core_dev);
1253         }
1254 }
1255
1256 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1257 {
1258         struct pci_dev *pdev = NULL;
1259         int i;
1260         /*
1261          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1262          * aren't announced by acpi. So, we need to use a legacy scan probing
1263          * to detect them
1264          */
1265         while (table && table->descr) {
1266                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1267                 if (unlikely(!pdev)) {
1268                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1269                                 pcibios_scan_specific_bus(255-i);
1270                 }
1271                 pci_dev_put(pdev);
1272                 table++;
1273         }
1274 }
1275
1276 static unsigned i7core_pci_lastbus(void)
1277 {
1278         int last_bus = 0, bus;
1279         struct pci_bus *b = NULL;
1280
1281         while ((b = pci_find_next_bus(b)) != NULL) {
1282                 bus = b->number;
1283                 debugf0("Found bus %d\n", bus);
1284                 if (bus > last_bus)
1285                         last_bus = bus;
1286         }
1287
1288         debugf0("Last bus %d\n", last_bus);
1289
1290         return last_bus;
1291 }
1292
1293 /*
1294  *      i7core_get_devices      Find and perform 'get' operation on the MCH's
1295  *                      device/functions we want to reference for this driver
1296  *
1297  *                      Need to 'get' device 16 func 1 and func 2
1298  */
1299 int i7core_get_onedevice(struct pci_dev **prev, const int devno,
1300                          const struct pci_id_descr *dev_descr,
1301                          const unsigned n_devs,
1302                          const unsigned last_bus)
1303 {
1304         struct i7core_dev *i7core_dev;
1305
1306         struct pci_dev *pdev = NULL;
1307         u8 bus = 0;
1308         u8 socket = 0;
1309
1310         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1311                               dev_descr->dev_id, *prev);
1312
1313         /*
1314          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1315          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1316          * to probe for the alternate address in case of failure
1317          */
1318         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1319                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1320                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1321
1322         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1323                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1324                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1325                                       *prev);
1326
1327         if (!pdev) {
1328                 if (*prev) {
1329                         *prev = pdev;
1330                         return 0;
1331                 }
1332
1333                 if (dev_descr->optional)
1334                         return 0;
1335
1336                 if (devno == 0)
1337                         return -ENODEV;
1338
1339                 i7core_printk(KERN_INFO,
1340                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1341                         dev_descr->dev, dev_descr->func,
1342                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1343
1344                 /* End of list, leave */
1345                 return -ENODEV;
1346         }
1347         bus = pdev->bus->number;
1348
1349         socket = last_bus - bus;
1350
1351         i7core_dev = get_i7core_dev(socket);
1352         if (!i7core_dev) {
1353                 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1354                 if (!i7core_dev)
1355                         return -ENOMEM;
1356                 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1357                                            GFP_KERNEL);
1358                 if (!i7core_dev->pdev) {
1359                         kfree(i7core_dev);
1360                         return -ENOMEM;
1361                 }
1362                 i7core_dev->socket = socket;
1363                 i7core_dev->n_devs = n_devs;
1364                 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1365         }
1366
1367         if (i7core_dev->pdev[devno]) {
1368                 i7core_printk(KERN_ERR,
1369                         "Duplicated device for "
1370                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1371                         bus, dev_descr->dev, dev_descr->func,
1372                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1373                 pci_dev_put(pdev);
1374                 return -ENODEV;
1375         }
1376
1377         i7core_dev->pdev[devno] = pdev;
1378
1379         /* Sanity check */
1380         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1381                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1382                 i7core_printk(KERN_ERR,
1383                         "Device PCI ID %04x:%04x "
1384                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1385                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1386                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1387                         bus, dev_descr->dev, dev_descr->func);
1388                 return -ENODEV;
1389         }
1390
1391         /* Be sure that the device is enabled */
1392         if (unlikely(pci_enable_device(pdev) < 0)) {
1393                 i7core_printk(KERN_ERR,
1394                         "Couldn't enable "
1395                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1396                         bus, dev_descr->dev, dev_descr->func,
1397                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1398                 return -ENODEV;
1399         }
1400
1401         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1402                 socket, bus, dev_descr->dev,
1403                 dev_descr->func,
1404                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1405
1406         *prev = pdev;
1407
1408         return 0;
1409 }
1410
1411 static int i7core_get_devices(const struct pci_id_table *table)
1412 {
1413         int i, rc, last_bus;
1414         struct pci_dev *pdev = NULL;
1415         const struct pci_id_descr *dev_descr;
1416
1417         last_bus = i7core_pci_lastbus();
1418
1419         while (table && table->descr) {
1420                 dev_descr = table->descr;
1421                 for (i = 0; i < table->n_devs; i++) {
1422                         pdev = NULL;
1423                         do {
1424                                 rc = i7core_get_onedevice(&pdev, i,
1425                                                           &dev_descr[i],
1426                                                           table->n_devs,
1427                                                           last_bus);
1428                                 if (rc < 0) {
1429                                         if (i == 0) {
1430                                                 i = table->n_devs;
1431                                                 break;
1432                                         }
1433                                         i7core_put_all_devices();
1434                                         return -ENODEV;
1435                                 }
1436                         } while (pdev);
1437                 }
1438                 table++;
1439         }
1440
1441         return 0;
1442 }
1443
1444 static int mci_bind_devs(struct mem_ctl_info *mci,
1445                          struct i7core_dev *i7core_dev)
1446 {
1447         struct i7core_pvt *pvt = mci->pvt_info;
1448         struct pci_dev *pdev;
1449         int i, func, slot;
1450
1451         /* Associates i7core_dev and mci for future usage */
1452         pvt->i7core_dev = i7core_dev;
1453         i7core_dev->mci = mci;
1454
1455         pvt->is_registered = 0;
1456         for (i = 0; i < i7core_dev->n_devs; i++) {
1457                 pdev = i7core_dev->pdev[i];
1458                 if (!pdev)
1459                         continue;
1460
1461                 func = PCI_FUNC(pdev->devfn);
1462                 slot = PCI_SLOT(pdev->devfn);
1463                 if (slot == 3) {
1464                         if (unlikely(func > MAX_MCR_FUNC))
1465                                 goto error;
1466                         pvt->pci_mcr[func] = pdev;
1467                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1468                         if (unlikely(func > MAX_CHAN_FUNC))
1469                                 goto error;
1470                         pvt->pci_ch[slot - 4][func] = pdev;
1471                 } else if (!slot && !func)
1472                         pvt->pci_noncore = pdev;
1473                 else
1474                         goto error;
1475
1476                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1477                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1478                         pdev, i7core_dev->socket);
1479
1480                 if (PCI_SLOT(pdev->devfn) == 3 &&
1481                         PCI_FUNC(pdev->devfn) == 2)
1482                         pvt->is_registered = 1;
1483         }
1484
1485         return 0;
1486
1487 error:
1488         i7core_printk(KERN_ERR, "Device %d, function %d "
1489                       "is out of the expected range\n",
1490                       slot, func);
1491         return -EINVAL;
1492 }
1493
1494 /****************************************************************************
1495                         Error check routines
1496  ****************************************************************************/
1497 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1498                                       const int chan,
1499                                       const int dimm,
1500                                       const int add)
1501 {
1502         char *msg;
1503         struct i7core_pvt *pvt = mci->pvt_info;
1504         int row = pvt->csrow_map[chan][dimm], i;
1505
1506         for (i = 0; i < add; i++) {
1507                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1508                                 "(Socket=%d channel=%d dimm=%d)",
1509                                 pvt->i7core_dev->socket, chan, dimm);
1510
1511                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1512                 kfree (msg);
1513         }
1514 }
1515
1516 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1517                                          const int chan,
1518                                          const int new0,
1519                                          const int new1,
1520                                          const int new2)
1521 {
1522         struct i7core_pvt *pvt = mci->pvt_info;
1523         int add0 = 0, add1 = 0, add2 = 0;
1524         /* Updates CE counters if it is not the first time here */
1525         if (pvt->ce_count_available) {
1526                 /* Updates CE counters */
1527
1528                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1529                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1530                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1531
1532                 if (add2 < 0)
1533                         add2 += 0x7fff;
1534                 pvt->rdimm_ce_count[chan][2] += add2;
1535
1536                 if (add1 < 0)
1537                         add1 += 0x7fff;
1538                 pvt->rdimm_ce_count[chan][1] += add1;
1539
1540                 if (add0 < 0)
1541                         add0 += 0x7fff;
1542                 pvt->rdimm_ce_count[chan][0] += add0;
1543         } else
1544                 pvt->ce_count_available = 1;
1545
1546         /* Store the new values */
1547         pvt->rdimm_last_ce_count[chan][2] = new2;
1548         pvt->rdimm_last_ce_count[chan][1] = new1;
1549         pvt->rdimm_last_ce_count[chan][0] = new0;
1550
1551         /*updated the edac core */
1552         if (add0 != 0)
1553                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1554         if (add1 != 0)
1555                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1556         if (add2 != 0)
1557                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1558
1559 }
1560
1561 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1562 {
1563         struct i7core_pvt *pvt = mci->pvt_info;
1564         u32 rcv[3][2];
1565         int i, new0, new1, new2;
1566
1567         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1568         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1569                                                                 &rcv[0][0]);
1570         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1571                                                                 &rcv[0][1]);
1572         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1573                                                                 &rcv[1][0]);
1574         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1575                                                                 &rcv[1][1]);
1576         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1577                                                                 &rcv[2][0]);
1578         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1579                                                                 &rcv[2][1]);
1580         for (i = 0 ; i < 3; i++) {
1581                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1582                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1583                 /*if the channel has 3 dimms*/
1584                 if (pvt->channel[i].dimms > 2) {
1585                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1586                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1587                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1588                 } else {
1589                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1590                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1591                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1592                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1593                         new2 = 0;
1594                 }
1595
1596                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1597         }
1598 }
1599
1600 /* This function is based on the device 3 function 4 registers as described on:
1601  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1602  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1603  * also available at:
1604  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1605  */
1606 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1607 {
1608         struct i7core_pvt *pvt = mci->pvt_info;
1609         u32 rcv1, rcv0;
1610         int new0, new1, new2;
1611
1612         if (!pvt->pci_mcr[4]) {
1613                 debugf0("%s MCR registers not found\n", __func__);
1614                 return;
1615         }
1616
1617         /* Corrected test errors */
1618         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1619         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1620
1621         /* Store the new values */
1622         new2 = DIMM2_COR_ERR(rcv1);
1623         new1 = DIMM1_COR_ERR(rcv0);
1624         new0 = DIMM0_COR_ERR(rcv0);
1625
1626         /* Updates CE counters if it is not the first time here */
1627         if (pvt->ce_count_available) {
1628                 /* Updates CE counters */
1629                 int add0, add1, add2;
1630
1631                 add2 = new2 - pvt->udimm_last_ce_count[2];
1632                 add1 = new1 - pvt->udimm_last_ce_count[1];
1633                 add0 = new0 - pvt->udimm_last_ce_count[0];
1634
1635                 if (add2 < 0)
1636                         add2 += 0x7fff;
1637                 pvt->udimm_ce_count[2] += add2;
1638
1639                 if (add1 < 0)
1640                         add1 += 0x7fff;
1641                 pvt->udimm_ce_count[1] += add1;
1642
1643                 if (add0 < 0)
1644                         add0 += 0x7fff;
1645                 pvt->udimm_ce_count[0] += add0;
1646
1647                 if (add0 | add1 | add2)
1648                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1649                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1650                                       add0, add1, add2);
1651         } else
1652                 pvt->ce_count_available = 1;
1653
1654         /* Store the new values */
1655         pvt->udimm_last_ce_count[2] = new2;
1656         pvt->udimm_last_ce_count[1] = new1;
1657         pvt->udimm_last_ce_count[0] = new0;
1658 }
1659
1660 /*
1661  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1662  * Architectures Software Developer’s Manual Volume 3B.
1663  * Nehalem are defined as family 0x06, model 0x1a
1664  *
1665  * The MCA registers used here are the following ones:
1666  *     struct mce field MCA Register
1667  *     m->status        MSR_IA32_MC8_STATUS
1668  *     m->addr          MSR_IA32_MC8_ADDR
1669  *     m->misc          MSR_IA32_MC8_MISC
1670  * In the case of Nehalem, the error information is masked at .status and .misc
1671  * fields
1672  */
1673 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1674                                     const struct mce *m)
1675 {
1676         struct i7core_pvt *pvt = mci->pvt_info;
1677         char *type, *optype, *err, *msg;
1678         unsigned long error = m->status & 0x1ff0000l;
1679         u32 optypenum = (m->status >> 4) & 0x07;
1680         u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1681         u32 dimm = (m->misc >> 16) & 0x3;
1682         u32 channel = (m->misc >> 18) & 0x3;
1683         u32 syndrome = m->misc >> 32;
1684         u32 errnum = find_first_bit(&error, 32);
1685         int csrow;
1686
1687         if (m->mcgstatus & 1)
1688                 type = "FATAL";
1689         else
1690                 type = "NON_FATAL";
1691
1692         switch (optypenum) {
1693         case 0:
1694                 optype = "generic undef request";
1695                 break;
1696         case 1:
1697                 optype = "read error";
1698                 break;
1699         case 2:
1700                 optype = "write error";
1701                 break;
1702         case 3:
1703                 optype = "addr/cmd error";
1704                 break;
1705         case 4:
1706                 optype = "scrubbing error";
1707                 break;
1708         default:
1709                 optype = "reserved";
1710                 break;
1711         }
1712
1713         switch (errnum) {
1714         case 16:
1715                 err = "read ECC error";
1716                 break;
1717         case 17:
1718                 err = "RAS ECC error";
1719                 break;
1720         case 18:
1721                 err = "write parity error";
1722                 break;
1723         case 19:
1724                 err = "redundacy loss";
1725                 break;
1726         case 20:
1727                 err = "reserved";
1728                 break;
1729         case 21:
1730                 err = "memory range error";
1731                 break;
1732         case 22:
1733                 err = "RTID out of range";
1734                 break;
1735         case 23:
1736                 err = "address parity error";
1737                 break;
1738         case 24:
1739                 err = "byte enable parity error";
1740                 break;
1741         default:
1742                 err = "unknown";
1743         }
1744
1745         /* FIXME: should convert addr into bank and rank information */
1746         msg = kasprintf(GFP_ATOMIC,
1747                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1748                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1749                 type, (long long) m->addr, m->cpu, dimm, channel,
1750                 syndrome, core_err_cnt, (long long)m->status,
1751                 (long long)m->misc, optype, err);
1752
1753         debugf0("%s", msg);
1754
1755         csrow = pvt->csrow_map[channel][dimm];
1756
1757         /* Call the helper to output message */
1758         if (m->mcgstatus & 1)
1759                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1760                                 0 /* FIXME: should be channel here */, msg);
1761         else if (!pvt->is_registered)
1762                 edac_mc_handle_fbd_ce(mci, csrow,
1763                                 0 /* FIXME: should be channel here */, msg);
1764
1765         kfree(msg);
1766 }
1767
1768 /*
1769  *      i7core_check_error      Retrieve and process errors reported by the
1770  *                              hardware. Called by the Core module.
1771  */
1772 static void i7core_check_error(struct mem_ctl_info *mci)
1773 {
1774         struct i7core_pvt *pvt = mci->pvt_info;
1775         int i;
1776         unsigned count = 0;
1777         struct mce *m;
1778
1779         /*
1780          * MCE first step: Copy all mce errors into a temporary buffer
1781          * We use a double buffering here, to reduce the risk of
1782          * loosing an error.
1783          */
1784         smp_rmb();
1785         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1786                 % MCE_LOG_LEN;
1787         if (!count)
1788                 goto check_ce_error;
1789
1790         m = pvt->mce_outentry;
1791         if (pvt->mce_in + count > MCE_LOG_LEN) {
1792                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1793
1794                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1795                 smp_wmb();
1796                 pvt->mce_in = 0;
1797                 count -= l;
1798                 m += l;
1799         }
1800         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1801         smp_wmb();
1802         pvt->mce_in += count;
1803
1804         smp_rmb();
1805         if (pvt->mce_overrun) {
1806                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1807                               pvt->mce_overrun);
1808                 smp_wmb();
1809                 pvt->mce_overrun = 0;
1810         }
1811
1812         /*
1813          * MCE second step: parse errors and display
1814          */
1815         for (i = 0; i < count; i++)
1816                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1817
1818         /*
1819          * Now, let's increment CE error counts
1820          */
1821 check_ce_error:
1822         if (!pvt->is_registered)
1823                 i7core_udimm_check_mc_ecc_err(mci);
1824         else
1825                 i7core_rdimm_check_mc_ecc_err(mci);
1826 }
1827
1828 /*
1829  * i7core_mce_check_error       Replicates mcelog routine to get errors
1830  *                              This routine simply queues mcelog errors, and
1831  *                              return. The error itself should be handled later
1832  *                              by i7core_check_error.
1833  * WARNING: As this routine should be called at NMI time, extra care should
1834  * be taken to avoid deadlocks, and to be as fast as possible.
1835  */
1836 static int i7core_mce_check_error(void *priv, struct mce *mce)
1837 {
1838         struct mem_ctl_info *mci = priv;
1839         struct i7core_pvt *pvt = mci->pvt_info;
1840
1841         /*
1842          * Just let mcelog handle it if the error is
1843          * outside the memory controller
1844          */
1845         if (((mce->status & 0xffff) >> 7) != 1)
1846                 return 0;
1847
1848         /* Bank 8 registers are the only ones that we know how to handle */
1849         if (mce->bank != 8)
1850                 return 0;
1851
1852 #ifdef CONFIG_SMP
1853         /* Only handle if it is the right mc controller */
1854         if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1855                 return 0;
1856 #endif
1857
1858         smp_rmb();
1859         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1860                 smp_wmb();
1861                 pvt->mce_overrun++;
1862                 return 0;
1863         }
1864
1865         /* Copy memory error at the ringbuffer */
1866         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1867         smp_wmb();
1868         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1869
1870         /* Handle fatal errors immediately */
1871         if (mce->mcgstatus & 1)
1872                 i7core_check_error(mci);
1873
1874         /* Advice mcelog that the error were handled */
1875         return 1;
1876 }
1877
1878 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1879                                const int num_channels, const int num_csrows)
1880 {
1881         struct mem_ctl_info *mci;
1882         struct i7core_pvt *pvt;
1883         int csrow = 0;
1884         int rc;
1885
1886         /* allocate a new MC control structure */
1887         mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1888                             i7core_dev->socket);
1889         if (unlikely(!mci))
1890                 return -ENOMEM;
1891
1892         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1893                 __func__, mci, &i7core_dev->pdev[0]->dev);
1894
1895         /* record ptr to the generic device */
1896         mci->dev = &i7core_dev->pdev[0]->dev;
1897
1898         pvt = mci->pvt_info;
1899         memset(pvt, 0, sizeof(*pvt));
1900
1901         /*
1902          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1903          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1904          * memory channels
1905          */
1906         mci->mtype_cap = MEM_FLAG_DDR3;
1907         mci->edac_ctl_cap = EDAC_FLAG_NONE;
1908         mci->edac_cap = EDAC_FLAG_NONE;
1909         mci->mod_name = "i7core_edac.c";
1910         mci->mod_ver = I7CORE_REVISION;
1911         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1912                                   i7core_dev->socket);
1913         mci->dev_name = pci_name(i7core_dev->pdev[0]);
1914         mci->ctl_page_to_phys = NULL;
1915
1916         if (pvt->is_registered)
1917                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
1918         else
1919                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
1920
1921         /* Set the function pointer to an actual operation function */
1922         mci->edac_check = i7core_check_error;
1923
1924         /* Store pci devices at mci for faster access */
1925         rc = mci_bind_devs(mci, i7core_dev);
1926         if (unlikely(rc < 0))
1927                 goto fail;
1928
1929         /* Get dimm basic config */
1930         get_dimm_config(mci, &csrow);
1931
1932         /* add this new MC control structure to EDAC's list of MCs */
1933         if (unlikely(edac_mc_add_mc(mci))) {
1934                 debugf0("MC: " __FILE__
1935                         ": %s(): failed edac_mc_add_mc()\n", __func__);
1936                 /* FIXME: perhaps some code should go here that disables error
1937                  * reporting if we just enabled it
1938                  */
1939
1940                 rc = -EINVAL;
1941                 goto fail;
1942         }
1943
1944         /* Default error mask is any memory */
1945         pvt->inject.channel = 0;
1946         pvt->inject.dimm = -1;
1947         pvt->inject.rank = -1;
1948         pvt->inject.bank = -1;
1949         pvt->inject.page = -1;
1950         pvt->inject.col = -1;
1951
1952         /* Registers on edac_mce in order to receive memory errors */
1953         pvt->edac_mce.priv = mci;
1954         pvt->edac_mce.check_error = i7core_mce_check_error;
1955
1956         /* allocating generic PCI control info */
1957         pvt->i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1958                                                  EDAC_MOD_STR);
1959         if (unlikely(!pvt->i7core_pci)) {
1960                 printk(KERN_WARNING
1961                         "%s(): Unable to create PCI control\n",
1962                         __func__);
1963                 printk(KERN_WARNING
1964                         "%s(): PCI error report via EDAC not setup\n",
1965                         __func__);
1966         }
1967
1968         rc = edac_mce_register(&pvt->edac_mce);
1969         if (unlikely(rc < 0)) {
1970                 debugf0("MC: " __FILE__
1971                         ": %s(): failed edac_mce_register()\n", __func__);
1972         }
1973
1974 fail:
1975         if (rc < 0)
1976                 edac_mc_free(mci);
1977         return rc;
1978 }
1979
1980 /*
1981  *      i7core_probe    Probe for ONE instance of device to see if it is
1982  *                      present.
1983  *      return:
1984  *              0 for FOUND a device
1985  *              < 0 for error code
1986  */
1987
1988 static int __devinit i7core_probe(struct pci_dev *pdev,
1989                                   const struct pci_device_id *id)
1990 {
1991         int rc;
1992         struct i7core_dev *i7core_dev;
1993
1994         /* get the pci devices we want to reserve for our use */
1995         mutex_lock(&i7core_edac_lock);
1996
1997         /*
1998          * All memory controllers are allocated at the first pass.
1999          */
2000         if (unlikely(probed >= 1)) {
2001                 mutex_unlock(&i7core_edac_lock);
2002                 return -EINVAL;
2003         }
2004         probed++;
2005
2006         rc = i7core_get_devices(pci_dev_table);
2007         if (unlikely(rc < 0))
2008                 goto fail0;
2009
2010         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2011                 int channels;
2012                 int csrows;
2013
2014                 /* Check the number of active and not disabled channels */
2015                 rc = i7core_get_active_channels(i7core_dev->socket,
2016                                                 &channels, &csrows);
2017                 if (unlikely(rc < 0))
2018                         goto fail1;
2019
2020                 rc = i7core_register_mci(i7core_dev, channels, csrows);
2021                 if (unlikely(rc < 0))
2022                         goto fail1;
2023         }
2024
2025         i7core_printk(KERN_INFO, "Driver loaded.\n");
2026
2027         mutex_unlock(&i7core_edac_lock);
2028         return 0;
2029
2030 fail1:
2031         i7core_put_all_devices();
2032 fail0:
2033         mutex_unlock(&i7core_edac_lock);
2034         return rc;
2035 }
2036
2037 /*
2038  *      i7core_remove   destructor for one instance of device
2039  *
2040  */
2041 static void __devexit i7core_remove(struct pci_dev *pdev)
2042 {
2043         struct mem_ctl_info *mci;
2044         struct i7core_dev *i7core_dev, *tmp;
2045         struct i7core_pvt *pvt;
2046
2047         debugf0(__FILE__ ": %s()\n", __func__);
2048
2049         /*
2050          * we have a trouble here: pdev value for removal will be wrong, since
2051          * it will point to the X58 register used to detect that the machine
2052          * is a Nehalem or upper design. However, due to the way several PCI
2053          * devices are grouped together to provide MC functionality, we need
2054          * to use a different method for releasing the devices
2055          */
2056
2057         mutex_lock(&i7core_edac_lock);
2058         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2059                 mci = find_mci_by_dev(&i7core_dev->pdev[0]->dev);
2060                 if (unlikely(!mci || !mci->pvt_info)) {
2061                         debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2062                                 __func__, &i7core_dev->pdev[0]->dev);
2063
2064                                 i7core_printk(KERN_ERR,
2065                                       "Couldn't find mci hanler\n");
2066                 } else {
2067                         pvt = mci->pvt_info;
2068                         i7core_dev = pvt->i7core_dev;
2069
2070                         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2071                                 __func__, mci, &i7core_dev->pdev[0]->dev);
2072
2073                         /* Disable MCE NMI handler */
2074                         edac_mce_unregister(&pvt->edac_mce);
2075
2076                         /* Disable EDAC polling */
2077                         if (likely(pvt->i7core_pci))
2078                                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2079                         else
2080                                 i7core_printk(KERN_ERR,
2081                                               "Couldn't find mem_ctl_info for socket %d\n",
2082                                               i7core_dev->socket);
2083                         pvt->i7core_pci = NULL;
2084
2085                         /* Remove MC sysfs nodes */
2086                         edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2087
2088                         /* Free data */
2089                         kfree(mci->ctl_name);
2090                         edac_mc_free(mci);
2091
2092                         /* Release PCI resources */
2093                         i7core_put_devices(i7core_dev);
2094                 }
2095                 list_del(&i7core_dev->list);
2096                 kfree(i7core_dev);
2097         }
2098         probed--;
2099
2100         mutex_unlock(&i7core_edac_lock);
2101 }
2102
2103 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2104
2105 /*
2106  *      i7core_driver   pci_driver structure for this module
2107  *
2108  */
2109 static struct pci_driver i7core_driver = {
2110         .name     = "i7core_edac",
2111         .probe    = i7core_probe,
2112         .remove   = __devexit_p(i7core_remove),
2113         .id_table = i7core_pci_tbl,
2114 };
2115
2116 /*
2117  *      i7core_init             Module entry function
2118  *                      Try to initialize this module for its devices
2119  */
2120 static int __init i7core_init(void)
2121 {
2122         int pci_rc;
2123
2124         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2125
2126         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2127         opstate_init();
2128
2129         i7core_xeon_pci_fixup(pci_dev_table);
2130
2131         pci_rc = pci_register_driver(&i7core_driver);
2132
2133         if (pci_rc >= 0)
2134                 return 0;
2135
2136         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2137                       pci_rc);
2138
2139         return pci_rc;
2140 }
2141
2142 /*
2143  *      i7core_exit()   Module exit function
2144  *                      Unregister the driver
2145  */
2146 static void __exit i7core_exit(void)
2147 {
2148         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2149         pci_unregister_driver(&i7core_driver);
2150 }
2151
2152 module_init(i7core_init);
2153 module_exit(i7core_exit);
2154
2155 MODULE_LICENSE("GPL");
2156 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2157 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2158 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2159                    I7CORE_REVISION);
2160
2161 module_param(edac_op_state, int, 0444);
2162 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");