ibmveth: remove procfs code
[linux-2.6.git] / drivers / net / ibmveth.c
1 /**************************************************************************/
2 /*                                                                        */
3 /* IBM eServer i/pSeries Virtual Ethernet Device Driver                   */
4 /* Copyright (C) 2003 IBM Corp.                                           */
5 /*  Originally written by Dave Larson (larson1@us.ibm.com)                */
6 /*  Maintained by Santiago Leon (santil@us.ibm.com)                       */
7 /*                                                                        */
8 /*  This program is free software; you can redistribute it and/or modify  */
9 /*  it under the terms of the GNU General Public License as published by  */
10 /*  the Free Software Foundation; either version 2 of the License, or     */
11 /*  (at your option) any later version.                                   */
12 /*                                                                        */
13 /*  This program is distributed in the hope that it will be useful,       */
14 /*  but WITHOUT ANY WARRANTY; without even the implied warranty of        */
15 /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         */
16 /*  GNU General Public License for more details.                          */
17 /*                                                                        */
18 /*  You should have received a copy of the GNU General Public License     */
19 /*  along with this program; if not, write to the Free Software           */
20 /*  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  */
21 /*                                                                   USA  */
22 /*                                                                        */
23 /* This module contains the implementation of a virtual ethernet device   */
24 /* for use with IBM i/pSeries LPAR Linux.  It utilizes the logical LAN    */
25 /* option of the RS/6000 Platform Architechture to interface with virtual */
26 /* ethernet NICs that are presented to the partition by the hypervisor.   */
27 /*                                                                        */
28 /**************************************************************************/
29
30 #include <linux/module.h>
31 #include <linux/moduleparam.h>
32 #include <linux/types.h>
33 #include <linux/errno.h>
34 #include <linux/ioport.h>
35 #include <linux/dma-mapping.h>
36 #include <linux/kernel.h>
37 #include <linux/netdevice.h>
38 #include <linux/etherdevice.h>
39 #include <linux/skbuff.h>
40 #include <linux/init.h>
41 #include <linux/delay.h>
42 #include <linux/mm.h>
43 #include <linux/pm.h>
44 #include <linux/ethtool.h>
45 #include <linux/in.h>
46 #include <linux/ip.h>
47 #include <linux/ipv6.h>
48 #include <linux/slab.h>
49 #include <asm/hvcall.h>
50 #include <asm/atomic.h>
51 #include <asm/vio.h>
52 #include <asm/iommu.h>
53 #include <asm/uaccess.h>
54 #include <asm/firmware.h>
55
56 #include "ibmveth.h"
57
58 #undef DEBUG
59
60 #define ibmveth_printk(fmt, args...) \
61   printk(KERN_DEBUG "%s: " fmt, __FILE__, ## args)
62
63 #define ibmveth_error_printk(fmt, args...) \
64   printk(KERN_ERR "(%s:%3.3d ua:%x) ERROR: " fmt, __FILE__, __LINE__ , adapter->vdev->unit_address, ## args)
65
66 #ifdef DEBUG
67 #define ibmveth_debug_printk_no_adapter(fmt, args...) \
68   printk(KERN_DEBUG "(%s:%3.3d): " fmt, __FILE__, __LINE__ , ## args)
69 #define ibmveth_debug_printk(fmt, args...) \
70   printk(KERN_DEBUG "(%s:%3.3d ua:%x): " fmt, __FILE__, __LINE__ , adapter->vdev->unit_address, ## args)
71 #define ibmveth_assert(expr) \
72   if(!(expr)) {                                   \
73     printk(KERN_DEBUG "assertion failed (%s:%3.3d ua:%x): %s\n", __FILE__, __LINE__, adapter->vdev->unit_address, #expr); \
74     BUG(); \
75   }
76 #else
77 #define ibmveth_debug_printk_no_adapter(fmt, args...)
78 #define ibmveth_debug_printk(fmt, args...)
79 #define ibmveth_assert(expr)
80 #endif
81
82 static int ibmveth_open(struct net_device *dev);
83 static int ibmveth_close(struct net_device *dev);
84 static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
85 static int ibmveth_poll(struct napi_struct *napi, int budget);
86 static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *dev);
87 static void ibmveth_set_multicast_list(struct net_device *dev);
88 static int ibmveth_change_mtu(struct net_device *dev, int new_mtu);
89 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
90 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
91 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
92 static struct kobj_type ktype_veth_pool;
93
94
95 static const char ibmveth_driver_name[] = "ibmveth";
96 static const char ibmveth_driver_string[] = "IBM i/pSeries Virtual Ethernet Driver";
97 #define ibmveth_driver_version "1.03"
98
99 MODULE_AUTHOR("Santiago Leon <santil@us.ibm.com>");
100 MODULE_DESCRIPTION("IBM i/pSeries Virtual Ethernet Driver");
101 MODULE_LICENSE("GPL");
102 MODULE_VERSION(ibmveth_driver_version);
103
104 static unsigned int tx_copybreak __read_mostly = 128;
105 module_param(tx_copybreak, uint, 0644);
106 MODULE_PARM_DESC(tx_copybreak,
107         "Maximum size of packet that is copied to a new buffer on transmit");
108
109 static unsigned int rx_copybreak __read_mostly = 128;
110 module_param(rx_copybreak, uint, 0644);
111 MODULE_PARM_DESC(rx_copybreak,
112         "Maximum size of packet that is copied to a new buffer on receive");
113
114 static unsigned int rx_flush __read_mostly = 0;
115 module_param(rx_flush, uint, 0644);
116 MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use");
117
118 struct ibmveth_stat {
119         char name[ETH_GSTRING_LEN];
120         int offset;
121 };
122
123 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
124 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
125
126 struct ibmveth_stat ibmveth_stats[] = {
127         { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) },
128         { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) },
129         { "replenish_add_buff_failure", IBMVETH_STAT_OFF(replenish_add_buff_failure) },
130         { "replenish_add_buff_success", IBMVETH_STAT_OFF(replenish_add_buff_success) },
131         { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer) },
132         { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer) },
133         { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed) },
134         { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) },
135         { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) },
136         { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) },
137 };
138
139 /* simple methods of getting data from the current rxq entry */
140 static inline u32 ibmveth_rxq_flags(struct ibmveth_adapter *adapter)
141 {
142         return adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off;
143 }
144
145 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter *adapter)
146 {
147         return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_TOGGLE) >> IBMVETH_RXQ_TOGGLE_SHIFT;
148 }
149
150 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter *adapter)
151 {
152         return (ibmveth_rxq_toggle(adapter) == adapter->rx_queue.toggle);
153 }
154
155 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter *adapter)
156 {
157         return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_VALID);
158 }
159
160 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter)
161 {
162         return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK);
163 }
164
165 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter)
166 {
167         return (adapter->rx_queue.queue_addr[adapter->rx_queue.index].length);
168 }
169
170 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter)
171 {
172         return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD);
173 }
174
175 /* setup the initial settings for a buffer pool */
176 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, u32 pool_index, u32 pool_size, u32 buff_size, u32 pool_active)
177 {
178         pool->size = pool_size;
179         pool->index = pool_index;
180         pool->buff_size = buff_size;
181         pool->threshold = pool_size * 7 / 8;
182         pool->active = pool_active;
183 }
184
185 /* allocate and setup an buffer pool - called during open */
186 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool)
187 {
188         int i;
189
190         pool->free_map = kmalloc(sizeof(u16) * pool->size, GFP_KERNEL);
191
192         if(!pool->free_map) {
193                 return -1;
194         }
195
196         pool->dma_addr = kmalloc(sizeof(dma_addr_t) * pool->size, GFP_KERNEL);
197         if(!pool->dma_addr) {
198                 kfree(pool->free_map);
199                 pool->free_map = NULL;
200                 return -1;
201         }
202
203         pool->skbuff = kcalloc(pool->size, sizeof(void *), GFP_KERNEL);
204
205         if(!pool->skbuff) {
206                 kfree(pool->dma_addr);
207                 pool->dma_addr = NULL;
208
209                 kfree(pool->free_map);
210                 pool->free_map = NULL;
211                 return -1;
212         }
213
214         memset(pool->dma_addr, 0, sizeof(dma_addr_t) * pool->size);
215
216         for(i = 0; i < pool->size; ++i) {
217                 pool->free_map[i] = i;
218         }
219
220         atomic_set(&pool->available, 0);
221         pool->producer_index = 0;
222         pool->consumer_index = 0;
223
224         return 0;
225 }
226
227 static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
228 {
229         unsigned long offset;
230
231         for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
232                 asm("dcbfl %0,%1" :: "b" (addr), "r" (offset));
233 }
234
235 /* replenish the buffers for a pool.  note that we don't need to
236  * skb_reserve these since they are used for incoming...
237  */
238 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struct ibmveth_buff_pool *pool)
239 {
240         u32 i;
241         u32 count = pool->size - atomic_read(&pool->available);
242         u32 buffers_added = 0;
243         struct sk_buff *skb;
244         unsigned int free_index, index;
245         u64 correlator;
246         unsigned long lpar_rc;
247         dma_addr_t dma_addr;
248
249         mb();
250
251         for(i = 0; i < count; ++i) {
252                 union ibmveth_buf_desc desc;
253
254                 skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
255
256                 if(!skb) {
257                         ibmveth_debug_printk("replenish: unable to allocate skb\n");
258                         adapter->replenish_no_mem++;
259                         break;
260                 }
261
262                 free_index = pool->consumer_index;
263                 pool->consumer_index++;
264                 if (pool->consumer_index >= pool->size)
265                         pool->consumer_index = 0;
266                 index = pool->free_map[free_index];
267
268                 ibmveth_assert(index != IBM_VETH_INVALID_MAP);
269                 ibmveth_assert(pool->skbuff[index] == NULL);
270
271                 dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
272                                 pool->buff_size, DMA_FROM_DEVICE);
273
274                 if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
275                         goto failure;
276
277                 pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
278                 pool->dma_addr[index] = dma_addr;
279                 pool->skbuff[index] = skb;
280
281                 correlator = ((u64)pool->index << 32) | index;
282                 *(u64*)skb->data = correlator;
283
284                 desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
285                 desc.fields.address = dma_addr;
286
287                 if (rx_flush) {
288                         unsigned int len = min(pool->buff_size,
289                                                 adapter->netdev->mtu +
290                                                 IBMVETH_BUFF_OH);
291                         ibmveth_flush_buffer(skb->data, len);
292                 }
293                 lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
294
295                 if (lpar_rc != H_SUCCESS)
296                         goto failure;
297                 else {
298                         buffers_added++;
299                         adapter->replenish_add_buff_success++;
300                 }
301         }
302
303         mb();
304         atomic_add(buffers_added, &(pool->available));
305         return;
306
307 failure:
308         pool->free_map[free_index] = index;
309         pool->skbuff[index] = NULL;
310         if (pool->consumer_index == 0)
311                 pool->consumer_index = pool->size - 1;
312         else
313                 pool->consumer_index--;
314         if (!dma_mapping_error(&adapter->vdev->dev, dma_addr))
315                 dma_unmap_single(&adapter->vdev->dev,
316                                  pool->dma_addr[index], pool->buff_size,
317                                  DMA_FROM_DEVICE);
318         dev_kfree_skb_any(skb);
319         adapter->replenish_add_buff_failure++;
320
321         mb();
322         atomic_add(buffers_added, &(pool->available));
323 }
324
325 /* replenish routine */
326 static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
327 {
328         int i;
329
330         adapter->replenish_task_cycles++;
331
332         for (i = (IbmVethNumBufferPools - 1); i >= 0; i--) {
333                 struct ibmveth_buff_pool *pool = &adapter->rx_buff_pool[i];
334
335                 if (pool->active &&
336                     (atomic_read(&pool->available) < pool->threshold))
337                         ibmveth_replenish_buffer_pool(adapter, pool);
338         }
339
340         adapter->rx_no_buffer = *(u64*)(((char*)adapter->buffer_list_addr) + 4096 - 8);
341 }
342
343 /* empty and free ana buffer pool - also used to do cleanup in error paths */
344 static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, struct ibmveth_buff_pool *pool)
345 {
346         int i;
347
348         kfree(pool->free_map);
349         pool->free_map = NULL;
350
351         if(pool->skbuff && pool->dma_addr) {
352                 for(i = 0; i < pool->size; ++i) {
353                         struct sk_buff *skb = pool->skbuff[i];
354                         if(skb) {
355                                 dma_unmap_single(&adapter->vdev->dev,
356                                                  pool->dma_addr[i],
357                                                  pool->buff_size,
358                                                  DMA_FROM_DEVICE);
359                                 dev_kfree_skb_any(skb);
360                                 pool->skbuff[i] = NULL;
361                         }
362                 }
363         }
364
365         if(pool->dma_addr) {
366                 kfree(pool->dma_addr);
367                 pool->dma_addr = NULL;
368         }
369
370         if(pool->skbuff) {
371                 kfree(pool->skbuff);
372                 pool->skbuff = NULL;
373         }
374 }
375
376 /* remove a buffer from a pool */
377 static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, u64 correlator)
378 {
379         unsigned int pool  = correlator >> 32;
380         unsigned int index = correlator & 0xffffffffUL;
381         unsigned int free_index;
382         struct sk_buff *skb;
383
384         ibmveth_assert(pool < IbmVethNumBufferPools);
385         ibmveth_assert(index < adapter->rx_buff_pool[pool].size);
386
387         skb = adapter->rx_buff_pool[pool].skbuff[index];
388
389         ibmveth_assert(skb != NULL);
390
391         adapter->rx_buff_pool[pool].skbuff[index] = NULL;
392
393         dma_unmap_single(&adapter->vdev->dev,
394                          adapter->rx_buff_pool[pool].dma_addr[index],
395                          adapter->rx_buff_pool[pool].buff_size,
396                          DMA_FROM_DEVICE);
397
398         free_index = adapter->rx_buff_pool[pool].producer_index;
399         adapter->rx_buff_pool[pool].producer_index++;
400         if (adapter->rx_buff_pool[pool].producer_index >=
401             adapter->rx_buff_pool[pool].size)
402                 adapter->rx_buff_pool[pool].producer_index = 0;
403         adapter->rx_buff_pool[pool].free_map[free_index] = index;
404
405         mb();
406
407         atomic_dec(&(adapter->rx_buff_pool[pool].available));
408 }
409
410 /* get the current buffer on the rx queue */
411 static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *adapter)
412 {
413         u64 correlator = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
414         unsigned int pool = correlator >> 32;
415         unsigned int index = correlator & 0xffffffffUL;
416
417         ibmveth_assert(pool < IbmVethNumBufferPools);
418         ibmveth_assert(index < adapter->rx_buff_pool[pool].size);
419
420         return adapter->rx_buff_pool[pool].skbuff[index];
421 }
422
423 /* recycle the current buffer on the rx queue */
424 static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
425 {
426         u32 q_index = adapter->rx_queue.index;
427         u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator;
428         unsigned int pool = correlator >> 32;
429         unsigned int index = correlator & 0xffffffffUL;
430         union ibmveth_buf_desc desc;
431         unsigned long lpar_rc;
432
433         ibmveth_assert(pool < IbmVethNumBufferPools);
434         ibmveth_assert(index < adapter->rx_buff_pool[pool].size);
435
436         if(!adapter->rx_buff_pool[pool].active) {
437                 ibmveth_rxq_harvest_buffer(adapter);
438                 ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]);
439                 return;
440         }
441
442         desc.fields.flags_len = IBMVETH_BUF_VALID |
443                 adapter->rx_buff_pool[pool].buff_size;
444         desc.fields.address = adapter->rx_buff_pool[pool].dma_addr[index];
445
446         lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
447
448         if(lpar_rc != H_SUCCESS) {
449                 ibmveth_debug_printk("h_add_logical_lan_buffer failed during recycle rc=%ld", lpar_rc);
450                 ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
451         }
452
453         if(++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
454                 adapter->rx_queue.index = 0;
455                 adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
456         }
457 }
458
459 static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
460 {
461         ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
462
463         if(++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
464                 adapter->rx_queue.index = 0;
465                 adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
466         }
467 }
468
469 static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
470 {
471         int i;
472         struct device *dev = &adapter->vdev->dev;
473
474         if(adapter->buffer_list_addr != NULL) {
475                 if (!dma_mapping_error(dev, adapter->buffer_list_dma)) {
476                         dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
477                                         DMA_BIDIRECTIONAL);
478                         adapter->buffer_list_dma = DMA_ERROR_CODE;
479                 }
480                 free_page((unsigned long)adapter->buffer_list_addr);
481                 adapter->buffer_list_addr = NULL;
482         }
483
484         if(adapter->filter_list_addr != NULL) {
485                 if (!dma_mapping_error(dev, adapter->filter_list_dma)) {
486                         dma_unmap_single(dev, adapter->filter_list_dma, 4096,
487                                         DMA_BIDIRECTIONAL);
488                         adapter->filter_list_dma = DMA_ERROR_CODE;
489                 }
490                 free_page((unsigned long)adapter->filter_list_addr);
491                 adapter->filter_list_addr = NULL;
492         }
493
494         if(adapter->rx_queue.queue_addr != NULL) {
495                 if (!dma_mapping_error(dev, adapter->rx_queue.queue_dma)) {
496                         dma_unmap_single(dev,
497                                         adapter->rx_queue.queue_dma,
498                                         adapter->rx_queue.queue_len,
499                                         DMA_BIDIRECTIONAL);
500                         adapter->rx_queue.queue_dma = DMA_ERROR_CODE;
501                 }
502                 kfree(adapter->rx_queue.queue_addr);
503                 adapter->rx_queue.queue_addr = NULL;
504         }
505
506         for(i = 0; i<IbmVethNumBufferPools; i++)
507                 if (adapter->rx_buff_pool[i].active)
508                         ibmveth_free_buffer_pool(adapter,
509                                                  &adapter->rx_buff_pool[i]);
510
511         if (adapter->bounce_buffer != NULL) {
512                 if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
513                         dma_unmap_single(&adapter->vdev->dev,
514                                         adapter->bounce_buffer_dma,
515                                         adapter->netdev->mtu + IBMVETH_BUFF_OH,
516                                         DMA_BIDIRECTIONAL);
517                         adapter->bounce_buffer_dma = DMA_ERROR_CODE;
518                 }
519                 kfree(adapter->bounce_buffer);
520                 adapter->bounce_buffer = NULL;
521         }
522 }
523
524 static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
525         union ibmveth_buf_desc rxq_desc, u64 mac_address)
526 {
527         int rc, try_again = 1;
528
529         /* After a kexec the adapter will still be open, so our attempt to
530         * open it will fail. So if we get a failure we free the adapter and
531         * try again, but only once. */
532 retry:
533         rc = h_register_logical_lan(adapter->vdev->unit_address,
534                                     adapter->buffer_list_dma, rxq_desc.desc,
535                                     adapter->filter_list_dma, mac_address);
536
537         if (rc != H_SUCCESS && try_again) {
538                 do {
539                         rc = h_free_logical_lan(adapter->vdev->unit_address);
540                 } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY));
541
542                 try_again = 0;
543                 goto retry;
544         }
545
546         return rc;
547 }
548
549 static int ibmveth_open(struct net_device *netdev)
550 {
551         struct ibmveth_adapter *adapter = netdev_priv(netdev);
552         u64 mac_address = 0;
553         int rxq_entries = 1;
554         unsigned long lpar_rc;
555         int rc;
556         union ibmveth_buf_desc rxq_desc;
557         int i;
558         struct device *dev;
559
560         ibmveth_debug_printk("open starting\n");
561
562         napi_enable(&adapter->napi);
563
564         for(i = 0; i<IbmVethNumBufferPools; i++)
565                 rxq_entries += adapter->rx_buff_pool[i].size;
566
567         adapter->buffer_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
568         adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
569
570         if(!adapter->buffer_list_addr || !adapter->filter_list_addr) {
571                 ibmveth_error_printk("unable to allocate filter or buffer list pages\n");
572                 ibmveth_cleanup(adapter);
573                 napi_disable(&adapter->napi);
574                 return -ENOMEM;
575         }
576
577         adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) * rxq_entries;
578         adapter->rx_queue.queue_addr = kmalloc(adapter->rx_queue.queue_len, GFP_KERNEL);
579
580         if(!adapter->rx_queue.queue_addr) {
581                 ibmveth_error_printk("unable to allocate rx queue pages\n");
582                 ibmveth_cleanup(adapter);
583                 napi_disable(&adapter->napi);
584                 return -ENOMEM;
585         }
586
587         dev = &adapter->vdev->dev;
588
589         adapter->buffer_list_dma = dma_map_single(dev,
590                         adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL);
591         adapter->filter_list_dma = dma_map_single(dev,
592                         adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL);
593         adapter->rx_queue.queue_dma = dma_map_single(dev,
594                         adapter->rx_queue.queue_addr,
595                         adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL);
596
597         if ((dma_mapping_error(dev, adapter->buffer_list_dma)) ||
598             (dma_mapping_error(dev, adapter->filter_list_dma)) ||
599             (dma_mapping_error(dev, adapter->rx_queue.queue_dma))) {
600                 ibmveth_error_printk("unable to map filter or buffer list pages\n");
601                 ibmveth_cleanup(adapter);
602                 napi_disable(&adapter->napi);
603                 return -ENOMEM;
604         }
605
606         adapter->rx_queue.index = 0;
607         adapter->rx_queue.num_slots = rxq_entries;
608         adapter->rx_queue.toggle = 1;
609
610         memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
611         mac_address = mac_address >> 16;
612
613         rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | adapter->rx_queue.queue_len;
614         rxq_desc.fields.address = adapter->rx_queue.queue_dma;
615
616         ibmveth_debug_printk("buffer list @ 0x%p\n", adapter->buffer_list_addr);
617         ibmveth_debug_printk("filter list @ 0x%p\n", adapter->filter_list_addr);
618         ibmveth_debug_printk("receive q   @ 0x%p\n", adapter->rx_queue.queue_addr);
619
620         h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
621
622         lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address);
623
624         if(lpar_rc != H_SUCCESS) {
625                 ibmveth_error_printk("h_register_logical_lan failed with %ld\n", lpar_rc);
626                 ibmveth_error_printk("buffer TCE:0x%llx filter TCE:0x%llx rxq desc:0x%llx MAC:0x%llx\n",
627                                      adapter->buffer_list_dma,
628                                      adapter->filter_list_dma,
629                                      rxq_desc.desc,
630                                      mac_address);
631                 ibmveth_cleanup(adapter);
632                 napi_disable(&adapter->napi);
633                 return -ENONET;
634         }
635
636         for(i = 0; i<IbmVethNumBufferPools; i++) {
637                 if(!adapter->rx_buff_pool[i].active)
638                         continue;
639                 if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) {
640                         ibmveth_error_printk("unable to alloc pool\n");
641                         adapter->rx_buff_pool[i].active = 0;
642                         ibmveth_cleanup(adapter);
643                         napi_disable(&adapter->napi);
644                         return -ENOMEM ;
645                 }
646         }
647
648         ibmveth_debug_printk("registering irq 0x%x\n", netdev->irq);
649         if((rc = request_irq(netdev->irq, ibmveth_interrupt, 0, netdev->name, netdev)) != 0) {
650                 ibmveth_error_printk("unable to request irq 0x%x, rc %d\n", netdev->irq, rc);
651                 do {
652                         rc = h_free_logical_lan(adapter->vdev->unit_address);
653                 } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY));
654
655                 ibmveth_cleanup(adapter);
656                 napi_disable(&adapter->napi);
657                 return rc;
658         }
659
660         adapter->bounce_buffer =
661             kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL);
662         if (!adapter->bounce_buffer) {
663                 ibmveth_error_printk("unable to allocate bounce buffer\n");
664                 ibmveth_cleanup(adapter);
665                 napi_disable(&adapter->napi);
666                 return -ENOMEM;
667         }
668         adapter->bounce_buffer_dma =
669             dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
670                            netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
671         if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
672                 ibmveth_error_printk("unable to map bounce buffer\n");
673                 ibmveth_cleanup(adapter);
674                 napi_disable(&adapter->napi);
675                 return -ENOMEM;
676         }
677
678         ibmveth_debug_printk("initial replenish cycle\n");
679         ibmveth_interrupt(netdev->irq, netdev);
680
681         netif_start_queue(netdev);
682
683         ibmveth_debug_printk("open complete\n");
684
685         return 0;
686 }
687
688 static int ibmveth_close(struct net_device *netdev)
689 {
690         struct ibmveth_adapter *adapter = netdev_priv(netdev);
691         long lpar_rc;
692
693         ibmveth_debug_printk("close starting\n");
694
695         napi_disable(&adapter->napi);
696
697         if (!adapter->pool_config)
698                 netif_stop_queue(netdev);
699
700         h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
701
702         do {
703                 lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
704         } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
705
706         if(lpar_rc != H_SUCCESS)
707         {
708                 ibmveth_error_printk("h_free_logical_lan failed with %lx, continuing with close\n",
709                                      lpar_rc);
710         }
711
712         free_irq(netdev->irq, netdev);
713
714         adapter->rx_no_buffer = *(u64*)(((char*)adapter->buffer_list_addr) + 4096 - 8);
715
716         ibmveth_cleanup(adapter);
717
718         ibmveth_debug_printk("close complete\n");
719
720         return 0;
721 }
722
723 static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) {
724         cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
725         cmd->advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | ADVERTISED_FIBRE);
726         cmd->speed = SPEED_1000;
727         cmd->duplex = DUPLEX_FULL;
728         cmd->port = PORT_FIBRE;
729         cmd->phy_address = 0;
730         cmd->transceiver = XCVR_INTERNAL;
731         cmd->autoneg = AUTONEG_ENABLE;
732         cmd->maxtxpkt = 0;
733         cmd->maxrxpkt = 1;
734         return 0;
735 }
736
737 static void netdev_get_drvinfo (struct net_device *dev, struct ethtool_drvinfo *info) {
738         strncpy(info->driver, ibmveth_driver_name, sizeof(info->driver) - 1);
739         strncpy(info->version, ibmveth_driver_version, sizeof(info->version) - 1);
740 }
741
742 static u32 netdev_get_link(struct net_device *dev) {
743         return 1;
744 }
745
746 static void ibmveth_set_rx_csum_flags(struct net_device *dev, u32 data)
747 {
748         struct ibmveth_adapter *adapter = netdev_priv(dev);
749
750         if (data)
751                 adapter->rx_csum = 1;
752         else {
753                 /*
754                  * Since the ibmveth firmware interface does not have the concept of
755                  * separate tx/rx checksum offload enable, if rx checksum is disabled
756                  * we also have to disable tx checksum offload. Once we disable rx
757                  * checksum offload, we are no longer allowed to send tx buffers that
758                  * are not properly checksummed.
759                  */
760                 adapter->rx_csum = 0;
761                 dev->features &= ~NETIF_F_IP_CSUM;
762                 dev->features &= ~NETIF_F_IPV6_CSUM;
763         }
764 }
765
766 static void ibmveth_set_tx_csum_flags(struct net_device *dev, u32 data)
767 {
768         struct ibmveth_adapter *adapter = netdev_priv(dev);
769
770         if (data) {
771                 if (adapter->fw_ipv4_csum_support)
772                         dev->features |= NETIF_F_IP_CSUM;
773                 if (adapter->fw_ipv6_csum_support)
774                         dev->features |= NETIF_F_IPV6_CSUM;
775                 adapter->rx_csum = 1;
776         } else {
777                 dev->features &= ~NETIF_F_IP_CSUM;
778                 dev->features &= ~NETIF_F_IPV6_CSUM;
779         }
780 }
781
782 static int ibmveth_set_csum_offload(struct net_device *dev, u32 data,
783                                     void (*done) (struct net_device *, u32))
784 {
785         struct ibmveth_adapter *adapter = netdev_priv(dev);
786         unsigned long set_attr, clr_attr, ret_attr;
787         unsigned long set_attr6, clr_attr6;
788         long ret, ret6;
789         int rc1 = 0, rc2 = 0;
790         int restart = 0;
791
792         if (netif_running(dev)) {
793                 restart = 1;
794                 adapter->pool_config = 1;
795                 ibmveth_close(dev);
796                 adapter->pool_config = 0;
797         }
798
799         set_attr = 0;
800         clr_attr = 0;
801
802         if (data) {
803                 set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
804                 set_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
805         } else {
806                 clr_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
807                 clr_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
808         }
809
810         ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
811
812         if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) &&
813             !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) &&
814             (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
815                 ret = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
816                                          set_attr, &ret_attr);
817
818                 if (ret != H_SUCCESS) {
819                         ibmveth_error_printk("unable to change IPv4 checksum "
820                                              "offload settings. %d rc=%ld\n",
821                                              data, ret);
822
823                         ret = h_illan_attributes(adapter->vdev->unit_address,
824                                                  set_attr, clr_attr, &ret_attr);
825                 } else
826                         adapter->fw_ipv4_csum_support = data;
827
828                 ret6 = h_illan_attributes(adapter->vdev->unit_address,
829                                          clr_attr6, set_attr6, &ret_attr);
830
831                 if (ret6 != H_SUCCESS) {
832                         ibmveth_error_printk("unable to change IPv6 checksum "
833                                              "offload settings. %d rc=%ld\n",
834                                              data, ret);
835
836                         ret = h_illan_attributes(adapter->vdev->unit_address,
837                                                  set_attr6, clr_attr6,
838                                                  &ret_attr);
839                 } else
840                         adapter->fw_ipv6_csum_support = data;
841
842                 if (ret == H_SUCCESS || ret6 == H_SUCCESS)
843                         done(dev, data);
844                 else
845                         rc1 = -EIO;
846         } else {
847                 rc1 = -EIO;
848                 ibmveth_error_printk("unable to change checksum offload settings."
849                                      " %d rc=%ld ret_attr=%lx\n", data, ret, ret_attr);
850         }
851
852         if (restart)
853                 rc2 = ibmveth_open(dev);
854
855         return rc1 ? rc1 : rc2;
856 }
857
858 static int ibmveth_set_rx_csum(struct net_device *dev, u32 data)
859 {
860         struct ibmveth_adapter *adapter = netdev_priv(dev);
861
862         if ((data && adapter->rx_csum) || (!data && !adapter->rx_csum))
863                 return 0;
864
865         return ibmveth_set_csum_offload(dev, data, ibmveth_set_rx_csum_flags);
866 }
867
868 static int ibmveth_set_tx_csum(struct net_device *dev, u32 data)
869 {
870         struct ibmveth_adapter *adapter = netdev_priv(dev);
871         int rc = 0;
872
873         if (data && (dev->features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
874                 return 0;
875         if (!data && !(dev->features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
876                 return 0;
877
878         if (data && !adapter->rx_csum)
879                 rc = ibmveth_set_csum_offload(dev, data, ibmveth_set_tx_csum_flags);
880         else
881                 ibmveth_set_tx_csum_flags(dev, data);
882
883         return rc;
884 }
885
886 static u32 ibmveth_get_rx_csum(struct net_device *dev)
887 {
888         struct ibmveth_adapter *adapter = netdev_priv(dev);
889         return adapter->rx_csum;
890 }
891
892 static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
893 {
894         int i;
895
896         if (stringset != ETH_SS_STATS)
897                 return;
898
899         for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++, data += ETH_GSTRING_LEN)
900                 memcpy(data, ibmveth_stats[i].name, ETH_GSTRING_LEN);
901 }
902
903 static int ibmveth_get_sset_count(struct net_device *dev, int sset)
904 {
905         switch (sset) {
906         case ETH_SS_STATS:
907                 return ARRAY_SIZE(ibmveth_stats);
908         default:
909                 return -EOPNOTSUPP;
910         }
911 }
912
913 static void ibmveth_get_ethtool_stats(struct net_device *dev,
914                                       struct ethtool_stats *stats, u64 *data)
915 {
916         int i;
917         struct ibmveth_adapter *adapter = netdev_priv(dev);
918
919         for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++)
920                 data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset);
921 }
922
923 static const struct ethtool_ops netdev_ethtool_ops = {
924         .get_drvinfo            = netdev_get_drvinfo,
925         .get_settings           = netdev_get_settings,
926         .get_link               = netdev_get_link,
927         .set_tx_csum            = ibmveth_set_tx_csum,
928         .get_rx_csum            = ibmveth_get_rx_csum,
929         .set_rx_csum            = ibmveth_set_rx_csum,
930         .get_strings            = ibmveth_get_strings,
931         .get_sset_count         = ibmveth_get_sset_count,
932         .get_ethtool_stats      = ibmveth_get_ethtool_stats,
933         .set_sg                 = ethtool_op_set_sg,
934 };
935
936 static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
937 {
938         return -EOPNOTSUPP;
939 }
940
941 #define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1))
942
943 static int ibmveth_send(struct ibmveth_adapter *adapter,
944                         union ibmveth_buf_desc *descs)
945 {
946         unsigned long correlator;
947         unsigned int retry_count;
948         unsigned long ret;
949
950         /*
951          * The retry count sets a maximum for the number of broadcast and
952          * multicast destinations within the system.
953          */
954         retry_count = 1024;
955         correlator = 0;
956         do {
957                 ret = h_send_logical_lan(adapter->vdev->unit_address,
958                                              descs[0].desc, descs[1].desc,
959                                              descs[2].desc, descs[3].desc,
960                                              descs[4].desc, descs[5].desc,
961                                              correlator, &correlator);
962         } while ((ret == H_BUSY) && (retry_count--));
963
964         if (ret != H_SUCCESS && ret != H_DROPPED) {
965                 ibmveth_error_printk("tx: h_send_logical_lan failed with "
966                                      "rc=%ld\n", ret);
967                 return 1;
968         }
969
970         return 0;
971 }
972
973 static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
974                                       struct net_device *netdev)
975 {
976         struct ibmveth_adapter *adapter = netdev_priv(netdev);
977         unsigned int desc_flags;
978         union ibmveth_buf_desc descs[6];
979         int last, i;
980         int force_bounce = 0;
981
982         /*
983          * veth handles a maximum of 6 segments including the header, so
984          * we have to linearize the skb if there are more than this.
985          */
986         if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) {
987                 netdev->stats.tx_dropped++;
988                 goto out;
989         }
990
991         /* veth can't checksum offload UDP */
992         if (skb->ip_summed == CHECKSUM_PARTIAL &&
993             ((skb->protocol == htons(ETH_P_IP) &&
994               ip_hdr(skb)->protocol != IPPROTO_TCP) ||
995              (skb->protocol == htons(ETH_P_IPV6) &&
996               ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) &&
997             skb_checksum_help(skb)) {
998
999                 ibmveth_error_printk("tx: failed to checksum packet\n");
1000                 netdev->stats.tx_dropped++;
1001                 goto out;
1002         }
1003
1004         desc_flags = IBMVETH_BUF_VALID;
1005
1006         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1007                 unsigned char *buf = skb_transport_header(skb) +
1008                                                 skb->csum_offset;
1009
1010                 desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD);
1011
1012                 /* Need to zero out the checksum */
1013                 buf[0] = 0;
1014                 buf[1] = 0;
1015         }
1016
1017 retry_bounce:
1018         memset(descs, 0, sizeof(descs));
1019
1020         /*
1021          * If a linear packet is below the rx threshold then
1022          * copy it into the static bounce buffer. This avoids the
1023          * cost of a TCE insert and remove.
1024          */
1025         if (force_bounce || (!skb_is_nonlinear(skb) &&
1026                                 (skb->len < tx_copybreak))) {
1027                 skb_copy_from_linear_data(skb, adapter->bounce_buffer,
1028                                           skb->len);
1029
1030                 descs[0].fields.flags_len = desc_flags | skb->len;
1031                 descs[0].fields.address = adapter->bounce_buffer_dma;
1032
1033                 if (ibmveth_send(adapter, descs)) {
1034                         adapter->tx_send_failed++;
1035                         netdev->stats.tx_dropped++;
1036                 } else {
1037                         netdev->stats.tx_packets++;
1038                         netdev->stats.tx_bytes += skb->len;
1039                 }
1040
1041                 goto out;
1042         }
1043
1044         /* Map the header */
1045         descs[0].fields.address = dma_map_single(&adapter->vdev->dev, skb->data,
1046                                                  skb_headlen(skb),
1047                                                  DMA_TO_DEVICE);
1048         if (dma_mapping_error(&adapter->vdev->dev, descs[0].fields.address))
1049                 goto map_failed;
1050
1051         descs[0].fields.flags_len = desc_flags | skb_headlen(skb);
1052
1053         /* Map the frags */
1054         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1055                 unsigned long dma_addr;
1056                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1057
1058                 dma_addr = dma_map_page(&adapter->vdev->dev, frag->page,
1059                                         frag->page_offset, frag->size,
1060                                         DMA_TO_DEVICE);
1061
1062                 if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
1063                         goto map_failed_frags;
1064
1065                 descs[i+1].fields.flags_len = desc_flags | frag->size;
1066                 descs[i+1].fields.address = dma_addr;
1067         }
1068
1069         if (ibmveth_send(adapter, descs)) {
1070                 adapter->tx_send_failed++;
1071                 netdev->stats.tx_dropped++;
1072         } else {
1073                 netdev->stats.tx_packets++;
1074                 netdev->stats.tx_bytes += skb->len;
1075         }
1076
1077         for (i = 0; i < skb_shinfo(skb)->nr_frags + 1; i++)
1078                 dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
1079                                descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
1080                                DMA_TO_DEVICE);
1081
1082 out:
1083         dev_kfree_skb(skb);
1084         return NETDEV_TX_OK;
1085
1086 map_failed_frags:
1087         last = i+1;
1088         for (i = 0; i < last; i++)
1089                 dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
1090                                descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
1091                                DMA_TO_DEVICE);
1092
1093 map_failed:
1094         if (!firmware_has_feature(FW_FEATURE_CMO))
1095                 ibmveth_error_printk("tx: unable to map xmit buffer\n");
1096         adapter->tx_map_failed++;
1097         skb_linearize(skb);
1098         force_bounce = 1;
1099         goto retry_bounce;
1100 }
1101
1102 static int ibmveth_poll(struct napi_struct *napi, int budget)
1103 {
1104         struct ibmveth_adapter *adapter = container_of(napi, struct ibmveth_adapter, napi);
1105         struct net_device *netdev = adapter->netdev;
1106         int frames_processed = 0;
1107         unsigned long lpar_rc;
1108
1109  restart_poll:
1110         do {
1111                 if (!ibmveth_rxq_pending_buffer(adapter))
1112                         break;
1113
1114                 smp_rmb();
1115                 if (!ibmveth_rxq_buffer_valid(adapter)) {
1116                         wmb(); /* suggested by larson1 */
1117                         adapter->rx_invalid_buffer++;
1118                         ibmveth_debug_printk("recycling invalid buffer\n");
1119                         ibmveth_rxq_recycle_buffer(adapter);
1120                 } else {
1121                         struct sk_buff *skb, *new_skb;
1122                         int length = ibmveth_rxq_frame_length(adapter);
1123                         int offset = ibmveth_rxq_frame_offset(adapter);
1124                         int csum_good = ibmveth_rxq_csum_good(adapter);
1125
1126                         skb = ibmveth_rxq_get_buffer(adapter);
1127
1128                         new_skb = NULL;
1129                         if (length < rx_copybreak)
1130                                 new_skb = netdev_alloc_skb(netdev, length);
1131
1132                         if (new_skb) {
1133                                 skb_copy_to_linear_data(new_skb,
1134                                                         skb->data + offset,
1135                                                         length);
1136                                 if (rx_flush)
1137                                         ibmveth_flush_buffer(skb->data,
1138                                                 length + offset);
1139                                 skb = new_skb;
1140                                 ibmveth_rxq_recycle_buffer(adapter);
1141                         } else {
1142                                 ibmveth_rxq_harvest_buffer(adapter);
1143                                 skb_reserve(skb, offset);
1144                         }
1145
1146                         skb_put(skb, length);
1147                         skb->protocol = eth_type_trans(skb, netdev);
1148
1149                         if (csum_good)
1150                                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1151
1152                         netif_receive_skb(skb); /* send it up */
1153
1154                         netdev->stats.rx_packets++;
1155                         netdev->stats.rx_bytes += length;
1156                         frames_processed++;
1157                 }
1158         } while (frames_processed < budget);
1159
1160         ibmveth_replenish_task(adapter);
1161
1162         if (frames_processed < budget) {
1163                 /* We think we are done - reenable interrupts,
1164                  * then check once more to make sure we are done.
1165                  */
1166                 lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1167                                        VIO_IRQ_ENABLE);
1168
1169                 ibmveth_assert(lpar_rc == H_SUCCESS);
1170
1171                 napi_complete(napi);
1172
1173                 if (ibmveth_rxq_pending_buffer(adapter) &&
1174                     napi_reschedule(napi)) {
1175                         lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1176                                                VIO_IRQ_DISABLE);
1177                         goto restart_poll;
1178                 }
1179         }
1180
1181         return frames_processed;
1182 }
1183
1184 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
1185 {
1186         struct net_device *netdev = dev_instance;
1187         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1188         unsigned long lpar_rc;
1189
1190         if (napi_schedule_prep(&adapter->napi)) {
1191                 lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1192                                        VIO_IRQ_DISABLE);
1193                 ibmveth_assert(lpar_rc == H_SUCCESS);
1194                 __napi_schedule(&adapter->napi);
1195         }
1196         return IRQ_HANDLED;
1197 }
1198
1199 static void ibmveth_set_multicast_list(struct net_device *netdev)
1200 {
1201         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1202         unsigned long lpar_rc;
1203
1204         if ((netdev->flags & IFF_PROMISC) ||
1205             (netdev_mc_count(netdev) > adapter->mcastFilterSize)) {
1206                 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1207                                            IbmVethMcastEnableRecv |
1208                                            IbmVethMcastDisableFiltering,
1209                                            0);
1210                 if(lpar_rc != H_SUCCESS) {
1211                         ibmveth_error_printk("h_multicast_ctrl rc=%ld when entering promisc mode\n", lpar_rc);
1212                 }
1213         } else {
1214                 struct netdev_hw_addr *ha;
1215                 /* clear the filter table & disable filtering */
1216                 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1217                                            IbmVethMcastEnableRecv |
1218                                            IbmVethMcastDisableFiltering |
1219                                            IbmVethMcastClearFilterTable,
1220                                            0);
1221                 if(lpar_rc != H_SUCCESS) {
1222                         ibmveth_error_printk("h_multicast_ctrl rc=%ld when attempting to clear filter table\n", lpar_rc);
1223                 }
1224                 /* add the addresses to the filter table */
1225                 netdev_for_each_mc_addr(ha, netdev) {
1226                         // add the multicast address to the filter table
1227                         unsigned long mcast_addr = 0;
1228                         memcpy(((char *)&mcast_addr)+2, ha->addr, 6);
1229                         lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1230                                                    IbmVethMcastAddFilter,
1231                                                    mcast_addr);
1232                         if(lpar_rc != H_SUCCESS) {
1233                                 ibmveth_error_printk("h_multicast_ctrl rc=%ld when adding an entry to the filter table\n", lpar_rc);
1234                         }
1235                 }
1236
1237                 /* re-enable filtering */
1238                 lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1239                                            IbmVethMcastEnableFiltering,
1240                                            0);
1241                 if(lpar_rc != H_SUCCESS) {
1242                         ibmveth_error_printk("h_multicast_ctrl rc=%ld when enabling filtering\n", lpar_rc);
1243                 }
1244         }
1245 }
1246
1247 static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
1248 {
1249         struct ibmveth_adapter *adapter = netdev_priv(dev);
1250         struct vio_dev *viodev = adapter->vdev;
1251         int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
1252         int i, rc;
1253         int need_restart = 0;
1254
1255         if (new_mtu < IBMVETH_MAX_MTU)
1256                 return -EINVAL;
1257
1258         for (i = 0; i < IbmVethNumBufferPools; i++)
1259                 if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size)
1260                         break;
1261
1262         if (i == IbmVethNumBufferPools)
1263                 return -EINVAL;
1264
1265         /* Deactivate all the buffer pools so that the next loop can activate
1266            only the buffer pools necessary to hold the new MTU */
1267         if (netif_running(adapter->netdev)) {
1268                 need_restart = 1;
1269                 adapter->pool_config = 1;
1270                 ibmveth_close(adapter->netdev);
1271                 adapter->pool_config = 0;
1272         }
1273
1274         /* Look for an active buffer pool that can hold the new MTU */
1275         for(i = 0; i<IbmVethNumBufferPools; i++) {
1276                 adapter->rx_buff_pool[i].active = 1;
1277
1278                 if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
1279                         dev->mtu = new_mtu;
1280                         vio_cmo_set_dev_desired(viodev,
1281                                                 ibmveth_get_desired_dma
1282                                                 (viodev));
1283                         if (need_restart) {
1284                                 return ibmveth_open(adapter->netdev);
1285                         }
1286                         return 0;
1287                 }
1288         }
1289
1290         if (need_restart && (rc = ibmveth_open(adapter->netdev)))
1291                 return rc;
1292
1293         return -EINVAL;
1294 }
1295
1296 #ifdef CONFIG_NET_POLL_CONTROLLER
1297 static void ibmveth_poll_controller(struct net_device *dev)
1298 {
1299         ibmveth_replenish_task(netdev_priv(dev));
1300         ibmveth_interrupt(dev->irq, dev);
1301 }
1302 #endif
1303
1304 /**
1305  * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1306  *
1307  * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1308  *
1309  * Return value:
1310  *      Number of bytes of IO data the driver will need to perform well.
1311  */
1312 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
1313 {
1314         struct net_device *netdev = dev_get_drvdata(&vdev->dev);
1315         struct ibmveth_adapter *adapter;
1316         unsigned long ret;
1317         int i;
1318         int rxqentries = 1;
1319
1320         /* netdev inits at probe time along with the structures we need below*/
1321         if (netdev == NULL)
1322                 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT);
1323
1324         adapter = netdev_priv(netdev);
1325
1326         ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
1327         ret += IOMMU_PAGE_ALIGN(netdev->mtu);
1328
1329         for (i = 0; i < IbmVethNumBufferPools; i++) {
1330                 /* add the size of the active receive buffers */
1331                 if (adapter->rx_buff_pool[i].active)
1332                         ret +=
1333                             adapter->rx_buff_pool[i].size *
1334                             IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
1335                                     buff_size);
1336                 rxqentries += adapter->rx_buff_pool[i].size;
1337         }
1338         /* add the size of the receive queue entries */
1339         ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry));
1340
1341         return ret;
1342 }
1343
1344 static const struct net_device_ops ibmveth_netdev_ops = {
1345         .ndo_open               = ibmveth_open,
1346         .ndo_stop               = ibmveth_close,
1347         .ndo_start_xmit         = ibmveth_start_xmit,
1348         .ndo_set_multicast_list = ibmveth_set_multicast_list,
1349         .ndo_do_ioctl           = ibmveth_ioctl,
1350         .ndo_change_mtu         = ibmveth_change_mtu,
1351         .ndo_validate_addr      = eth_validate_addr,
1352         .ndo_set_mac_address    = eth_mac_addr,
1353 #ifdef CONFIG_NET_POLL_CONTROLLER
1354         .ndo_poll_controller    = ibmveth_poll_controller,
1355 #endif
1356 };
1357
1358 static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
1359 {
1360         int rc, i;
1361         struct net_device *netdev;
1362         struct ibmveth_adapter *adapter;
1363
1364         unsigned char *mac_addr_p;
1365         unsigned int *mcastFilterSize_p;
1366
1367
1368         ibmveth_debug_printk_no_adapter("entering ibmveth_probe for UA 0x%x\n",
1369                                         dev->unit_address);
1370
1371         mac_addr_p = (unsigned char *) vio_get_attribute(dev,
1372                                                 VETH_MAC_ADDR, NULL);
1373         if(!mac_addr_p) {
1374                 printk(KERN_ERR "(%s:%3.3d) ERROR: Can't find VETH_MAC_ADDR "
1375                                 "attribute\n", __FILE__, __LINE__);
1376                 return 0;
1377         }
1378
1379         mcastFilterSize_p = (unsigned int *) vio_get_attribute(dev,
1380                                                 VETH_MCAST_FILTER_SIZE, NULL);
1381         if(!mcastFilterSize_p) {
1382                 printk(KERN_ERR "(%s:%3.3d) ERROR: Can't find "
1383                                 "VETH_MCAST_FILTER_SIZE attribute\n",
1384                                 __FILE__, __LINE__);
1385                 return 0;
1386         }
1387
1388         netdev = alloc_etherdev(sizeof(struct ibmveth_adapter));
1389
1390         if(!netdev)
1391                 return -ENOMEM;
1392
1393         adapter = netdev_priv(netdev);
1394         dev_set_drvdata(&dev->dev, netdev);
1395
1396         adapter->vdev = dev;
1397         adapter->netdev = netdev;
1398         adapter->mcastFilterSize= *mcastFilterSize_p;
1399         adapter->pool_config = 0;
1400
1401         netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
1402
1403         /*      Some older boxes running PHYP non-natively have an OF that
1404                 returns a 8-byte local-mac-address field (and the first
1405                 2 bytes have to be ignored) while newer boxes' OF return
1406                 a 6-byte field. Note that IEEE 1275 specifies that
1407                 local-mac-address must be a 6-byte field.
1408                 The RPA doc specifies that the first byte must be 10b, so
1409                 we'll just look for it to solve this 8 vs. 6 byte field issue */
1410
1411         if ((*mac_addr_p & 0x3) != 0x02)
1412                 mac_addr_p += 2;
1413
1414         adapter->mac_addr = 0;
1415         memcpy(&adapter->mac_addr, mac_addr_p, 6);
1416
1417         netdev->irq = dev->irq;
1418         netdev->netdev_ops = &ibmveth_netdev_ops;
1419         netdev->ethtool_ops = &netdev_ethtool_ops;
1420         SET_NETDEV_DEV(netdev, &dev->dev);
1421         netdev->features |= NETIF_F_SG;
1422
1423         memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
1424
1425         for(i = 0; i<IbmVethNumBufferPools; i++) {
1426                 struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
1427                 int error;
1428
1429                 ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
1430                                          pool_count[i], pool_size[i],
1431                                          pool_active[i]);
1432                 error = kobject_init_and_add(kobj, &ktype_veth_pool,
1433                                              &dev->dev.kobj, "pool%d", i);
1434                 if (!error)
1435                         kobject_uevent(kobj, KOBJ_ADD);
1436         }
1437
1438         ibmveth_debug_printk("adapter @ 0x%p\n", adapter);
1439
1440         adapter->buffer_list_dma = DMA_ERROR_CODE;
1441         adapter->filter_list_dma = DMA_ERROR_CODE;
1442         adapter->rx_queue.queue_dma = DMA_ERROR_CODE;
1443
1444         ibmveth_debug_printk("registering netdev...\n");
1445
1446         ibmveth_set_csum_offload(netdev, 1, ibmveth_set_tx_csum_flags);
1447
1448         rc = register_netdev(netdev);
1449
1450         if(rc) {
1451                 ibmveth_debug_printk("failed to register netdev rc=%d\n", rc);
1452                 free_netdev(netdev);
1453                 return rc;
1454         }
1455
1456         ibmveth_debug_printk("registered\n");
1457
1458         return 0;
1459 }
1460
1461 static int __devexit ibmveth_remove(struct vio_dev *dev)
1462 {
1463         struct net_device *netdev = dev_get_drvdata(&dev->dev);
1464         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1465         int i;
1466
1467         for(i = 0; i<IbmVethNumBufferPools; i++)
1468                 kobject_put(&adapter->rx_buff_pool[i].kobj);
1469
1470         unregister_netdev(netdev);
1471
1472         free_netdev(netdev);
1473         dev_set_drvdata(&dev->dev, NULL);
1474
1475         return 0;
1476 }
1477
1478 static struct attribute veth_active_attr;
1479 static struct attribute veth_num_attr;
1480 static struct attribute veth_size_attr;
1481
1482 static ssize_t veth_pool_show(struct kobject * kobj,
1483                               struct attribute * attr, char * buf)
1484 {
1485         struct ibmveth_buff_pool *pool = container_of(kobj,
1486                                                       struct ibmveth_buff_pool,
1487                                                       kobj);
1488
1489         if (attr == &veth_active_attr)
1490                 return sprintf(buf, "%d\n", pool->active);
1491         else if (attr == &veth_num_attr)
1492                 return sprintf(buf, "%d\n", pool->size);
1493         else if (attr == &veth_size_attr)
1494                 return sprintf(buf, "%d\n", pool->buff_size);
1495         return 0;
1496 }
1497
1498 static ssize_t veth_pool_store(struct kobject * kobj, struct attribute * attr,
1499 const char * buf, size_t count)
1500 {
1501         struct ibmveth_buff_pool *pool = container_of(kobj,
1502                                                       struct ibmveth_buff_pool,
1503                                                       kobj);
1504         struct net_device *netdev = dev_get_drvdata(
1505             container_of(kobj->parent, struct device, kobj));
1506         struct ibmveth_adapter *adapter = netdev_priv(netdev);
1507         long value = simple_strtol(buf, NULL, 10);
1508         long rc;
1509
1510         if (attr == &veth_active_attr) {
1511                 if (value && !pool->active) {
1512                         if (netif_running(netdev)) {
1513                                 if(ibmveth_alloc_buffer_pool(pool)) {
1514                                         ibmveth_error_printk("unable to alloc pool\n");
1515                                         return -ENOMEM;
1516                                 }
1517                                 pool->active = 1;
1518                                 adapter->pool_config = 1;
1519                                 ibmveth_close(netdev);
1520                                 adapter->pool_config = 0;
1521                                 if ((rc = ibmveth_open(netdev)))
1522                                         return rc;
1523                         } else
1524                                 pool->active = 1;
1525                 } else if (!value && pool->active) {
1526                         int mtu = netdev->mtu + IBMVETH_BUFF_OH;
1527                         int i;
1528                         /* Make sure there is a buffer pool with buffers that
1529                            can hold a packet of the size of the MTU */
1530                         for (i = 0; i < IbmVethNumBufferPools; i++) {
1531                                 if (pool == &adapter->rx_buff_pool[i])
1532                                         continue;
1533                                 if (!adapter->rx_buff_pool[i].active)
1534                                         continue;
1535                                 if (mtu <= adapter->rx_buff_pool[i].buff_size)
1536                                         break;
1537                         }
1538
1539                         if (i == IbmVethNumBufferPools) {
1540                                 ibmveth_error_printk("no active pool >= MTU\n");
1541                                 return -EPERM;
1542                         }
1543
1544                         if (netif_running(netdev)) {
1545                                 adapter->pool_config = 1;
1546                                 ibmveth_close(netdev);
1547                                 pool->active = 0;
1548                                 adapter->pool_config = 0;
1549                                 if ((rc = ibmveth_open(netdev)))
1550                                         return rc;
1551                         }
1552                         pool->active = 0;
1553                 }
1554         } else if (attr == &veth_num_attr) {
1555                 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
1556                         return -EINVAL;
1557                 else {
1558                         if (netif_running(netdev)) {
1559                                 adapter->pool_config = 1;
1560                                 ibmveth_close(netdev);
1561                                 adapter->pool_config = 0;
1562                                 pool->size = value;
1563                                 if ((rc = ibmveth_open(netdev)))
1564                                         return rc;
1565                         } else
1566                                 pool->size = value;
1567                 }
1568         } else if (attr == &veth_size_attr) {
1569                 if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE)
1570                         return -EINVAL;
1571                 else {
1572                         if (netif_running(netdev)) {
1573                                 adapter->pool_config = 1;
1574                                 ibmveth_close(netdev);
1575                                 adapter->pool_config = 0;
1576                                 pool->buff_size = value;
1577                                 if ((rc = ibmveth_open(netdev)))
1578                                         return rc;
1579                         } else
1580                                 pool->buff_size = value;
1581                 }
1582         }
1583
1584         /* kick the interrupt handler to allocate/deallocate pools */
1585         ibmveth_interrupt(netdev->irq, netdev);
1586         return count;
1587 }
1588
1589
1590 #define ATTR(_name, _mode)      \
1591         struct attribute veth_##_name##_attr = {               \
1592         .name = __stringify(_name), .mode = _mode, \
1593         };
1594
1595 static ATTR(active, 0644);
1596 static ATTR(num, 0644);
1597 static ATTR(size, 0644);
1598
1599 static struct attribute * veth_pool_attrs[] = {
1600         &veth_active_attr,
1601         &veth_num_attr,
1602         &veth_size_attr,
1603         NULL,
1604 };
1605
1606 static const struct sysfs_ops veth_pool_ops = {
1607         .show   = veth_pool_show,
1608         .store  = veth_pool_store,
1609 };
1610
1611 static struct kobj_type ktype_veth_pool = {
1612         .release        = NULL,
1613         .sysfs_ops      = &veth_pool_ops,
1614         .default_attrs  = veth_pool_attrs,
1615 };
1616
1617 static int ibmveth_resume(struct device *dev)
1618 {
1619         struct net_device *netdev = dev_get_drvdata(dev);
1620         ibmveth_interrupt(netdev->irq, netdev);
1621         return 0;
1622 }
1623
1624 static struct vio_device_id ibmveth_device_table[] __devinitdata= {
1625         { "network", "IBM,l-lan"},
1626         { "", "" }
1627 };
1628 MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
1629
1630 static struct dev_pm_ops ibmveth_pm_ops = {
1631         .resume = ibmveth_resume
1632 };
1633
1634 static struct vio_driver ibmveth_driver = {
1635         .id_table       = ibmveth_device_table,
1636         .probe          = ibmveth_probe,
1637         .remove         = ibmveth_remove,
1638         .get_desired_dma = ibmveth_get_desired_dma,
1639         .driver         = {
1640                 .name   = ibmveth_driver_name,
1641                 .owner  = THIS_MODULE,
1642                 .pm = &ibmveth_pm_ops,
1643         }
1644 };
1645
1646 static int __init ibmveth_module_init(void)
1647 {
1648         ibmveth_printk("%s: %s %s\n", ibmveth_driver_name, ibmveth_driver_string, ibmveth_driver_version);
1649
1650         return vio_register_driver(&ibmveth_driver);
1651 }
1652
1653 static void __exit ibmveth_module_exit(void)
1654 {
1655         vio_unregister_driver(&ibmveth_driver);
1656 }
1657
1658 module_init(ibmveth_module_init);
1659 module_exit(ibmveth_module_exit);