arch/powerpc/lib/checksum_64.S

   1 /*
   2  * This file contains assembly-language implementations
   3  * of IP-style 1's complement checksum routines.
   4  *
   5  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
   6  *
   7  *  This program is free software; you can redistribute it and/or
   8  *  modify it under the terms of the GNU General Public License
   9  *  as published by the Free Software Foundation; either version
  10  *  2 of the License, or (at your option) any later version.
  11  *
  12  * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
  13  */
  14
  15 #include <linux/sys.h>
  16 #include <asm/processor.h>
  17 #include <asm/errno.h>
  18 #include <asm/ppc_asm.h>
  19
  20 /*
  21  * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
  22  * len is in words and is always >= 5.
  23  *
  24  * In practice len == 5, but this is not guaranteed.  So this code does not
  25  * attempt to use doubleword instructions.
  26  */
  27 _GLOBAL(ip_fast_csum)
  28         lwz     r0,0(r3)
  29         lwzu    r5,4(r3)
  30         addic.  r4,r4,-2
  31         addc    r0,r0,r5
  32         mtctr   r4
  33         blelr-
  34 1:      lwzu    r4,4(r3)
  35         adde    r0,r0,r4
  36         bdnz    1b
  37         addze   r0,r0           /* add in final carry */
  38         rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
  39         add     r0,r0,r4
  40         srdi    r0,r0,32
  41         rlwinm  r3,r0,16,0,31   /* fold two halves together */
  42         add     r3,r0,r3
  43         not     r3,r3
  44         srwi    r3,r3,16
  45         blr
  46
  47 /*
  48  * Compute checksum of TCP or UDP pseudo-header:
  49  *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
  50  * No real gain trying to do this specially for 64 bit, but
  51  * the 32 bit addition may spill into the upper bits of
  52  * the doubleword so we still must fold it down from 64.
  53  */
  54 _GLOBAL(csum_tcpudp_magic)
  55         rlwimi  r5,r6,16,0,15   /* put proto in upper half of len */
  56         addc    r0,r3,r4        /* add 4 32-bit words together */
  57         adde    r0,r0,r5
  58         adde    r0,r0,r7
  59         rldicl  r4,r0,32,0      /* fold 64 bit value */
  60         add     r0,r4,r0
  61         srdi    r0,r0,32
  62         rlwinm  r3,r0,16,0,31   /* fold two halves together */
  63         add     r3,r0,r3
  64         not     r3,r3
  65         srwi    r3,r3,16
  66         blr
  67
  68 /*
  69  * Computes the checksum of a memory block at buff, length len,
  70  * and adds in "sum" (32-bit).
  71  *
  72  * This code assumes at least halfword alignment, though the length
  73  * can be any number of bytes.  The sum is accumulated in r5.
  74  *
  75  * csum_partial(r3=buff, r4=len, r5=sum)
  76  */
  77 _GLOBAL(csum_partial)
  78         subi    r3,r3,8         /* we'll offset by 8 for the loads */
  79         srdi.   r6,r4,3         /* divide by 8 for doubleword count */
  80         addic   r5,r5,0         /* clear carry */
  81         beq     3f              /* if we're doing < 8 bytes */
  82         andi.   r0,r3,2         /* aligned on a word boundary already? */
  83         beq+    1f
  84         lhz     r6,8(r3)        /* do 2 bytes to get aligned */
  85         addi    r3,r3,2
  86         subi    r4,r4,2
  87         addc    r5,r5,r6
  88         srdi.   r6,r4,3         /* recompute number of doublewords */
  89         beq     3f              /* any left? */
  90 1:      mtctr   r6
  91 2:      ldu     r6,8(r3)        /* main sum loop */
  92         adde    r5,r5,r6
  93         bdnz    2b
  94         andi.   r4,r4,7         /* compute bytes left to sum after doublewords */
  95 3:      cmpwi   0,r4,4          /* is at least a full word left? */
  96         blt     4f
  97         lwz     r6,8(r3)        /* sum this word */
  98         addi    r3,r3,4
  99         subi    r4,r4,4
 100         adde    r5,r5,r6
 101 4:      cmpwi   0,r4,2          /* is at least a halfword left? */
 102         blt+    5f
 103         lhz     r6,8(r3)        /* sum this halfword */
 104         addi    r3,r3,2
 105         subi    r4,r4,2
 106         adde    r5,r5,r6
 107 5:      cmpwi   0,r4,1          /* is at least a byte left? */
 108         bne+    6f
 109         lbz     r6,8(r3)        /* sum this byte */
 110         slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
 111         adde    r5,r5,r6
 112 6:      addze   r5,r5           /* add in final carry */
 113         rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
 114         add     r3,r4,r5
 115         srdi    r3,r3,32
 116         blr
 117
 118 /*
 119  * Computes the checksum of a memory block at src, length len,
 120  * and adds in "sum" (32-bit), while copying the block to dst.
 121  * If an access exception occurs on src or dst, it stores -EFAULT
 122  * to *src_err or *dst_err respectively, and (for an error on
 123  * src) zeroes the rest of dst.
 124  *
 125  * This code needs to be reworked to take advantage of 64 bit sum+copy.
 126  * However, due to tokenring halfword alignment problems this will be very
 127  * tricky.  For now we'll leave it until we instrument it somehow.
 128  *
 129  * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
 130  */
 131 _GLOBAL(csum_partial_copy_generic)
 132         addic   r0,r6,0
 133         subi    r3,r3,4
 134         subi    r4,r4,4
 135         srwi.   r6,r5,2
 136         beq     3f              /* if we're doing < 4 bytes */
 137         andi.   r9,r4,2         /* Align dst to longword boundary */
 138         beq+    1f
 139 81:     lhz     r6,4(r3)        /* do 2 bytes to get aligned */
 140         addi    r3,r3,2
 141         subi    r5,r5,2
 142 91:     sth     r6,4(r4)
 143         addi    r4,r4,2
 144         addc    r0,r0,r6
 145         srwi.   r6,r5,2         /* # words to do */
 146         beq     3f
 147 1:      mtctr   r6
 148 82:     lwzu    r6,4(r3)        /* the bdnz has zero overhead, so it should */
 149 92:     stwu    r6,4(r4)        /* be unnecessary to unroll this loop */
 150         adde    r0,r0,r6
 151         bdnz    82b
 152         andi.   r5,r5,3
 153 3:      cmpwi   0,r5,2
 154         blt+    4f
 155 83:     lhz     r6,4(r3)
 156         addi    r3,r3,2
 157         subi    r5,r5,2
 158 93:     sth     r6,4(r4)
 159         addi    r4,r4,2
 160         adde    r0,r0,r6
 161 4:      cmpwi   0,r5,1
 162         bne+    5f
 163 84:     lbz     r6,4(r3)
 164 94:     stb     r6,4(r4)
 165         slwi    r6,r6,8         /* Upper byte of word */
 166         adde    r0,r0,r6
 167 5:      addze   r3,r0           /* add in final carry (unlikely with 64-bit regs) */
 168         rldicl  r4,r3,32,0      /* fold 64 bit value */
 169         add     r3,r4,r3
 170         srdi    r3,r3,32
 171         blr
 172
 173 /* These shouldn't go in the fixup section, since that would
 174    cause the ex_table addresses to get out of order. */
 175
 176         .globl src_error_1
 177 src_error_1:
 178         li      r6,0
 179         subi    r5,r5,2
 180 95:     sth     r6,4(r4)
 181         addi    r4,r4,2
 182         srwi.   r6,r5,2
 183         beq     3f
 184         mtctr   r6
 185         .globl src_error_2
 186 src_error_2:
 187         li      r6,0
 188 96:     stwu    r6,4(r4)
 189         bdnz    96b
 190 3:      andi.   r5,r5,3
 191         beq     src_error
 192         .globl src_error_3
 193 src_error_3:
 194         li      r6,0
 195         mtctr   r5
 196         addi    r4,r4,3
 197 97:     stbu    r6,1(r4)
 198         bdnz    97b
 199         .globl src_error
 200 src_error:
 201         cmpdi   0,r7,0
 202         beq     1f
 203         li      r6,-EFAULT
 204         stw     r6,0(r7)
 205 1:      addze   r3,r0
 206         blr
 207
 208         .globl dst_error
 209 dst_error:
 210         cmpdi   0,r8,0
 211         beq     1f
 212         li      r6,-EFAULT
 213         stw     r6,0(r8)
 214 1:      addze   r3,r0
 215         blr
 216
 217 .section __ex_table,"a"
 218         .align  3
 219         .llong  81b,src_error_1
 220         .llong  91b,dst_error
 221         .llong  82b,src_error_2
 222         .llong  92b,dst_error
 223         .llong  83b,src_error_3
 224         .llong  93b,dst_error
 225         .llong  84b,src_error_3
 226         .llong  94b,dst_error
 227         .llong  95b,dst_error
 228         .llong  96b,dst_error
 229         .llong  97b,dst_error