Linux-2.6.12-rc2
[linux-2.6.git] / arch / arm / lib / csumpartialcopygeneric.S
1 /*
2  *  linux/arch/arm/lib/csumpartialcopygeneric.S
3  *
4  *  Copyright (C) 1995-2001 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * unsigned int
13  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14  *  r0 = src, r1 = dst, r2 = len, r3 = sum
15  *  Returns : r0 = checksum
16  *
17  * Note that 'tst' and 'teq' preserve the carry flag.
18  */
19
20 src     .req    r0
21 dst     .req    r1
22 len     .req    r2
23 sum     .req    r3
24
25 .zero:          mov     r0, sum
26                 load_regs       ea
27
28                 /*
29                  * Align an unaligned destination pointer.  We know that
30                  * we have >= 8 bytes here, so we don't need to check
31                  * the length.  Note that the source pointer hasn't been
32                  * aligned yet.
33                  */
34 .dst_unaligned: tst     dst, #1
35                 beq     .dst_16bit
36
37                 load1b  ip
38                 sub     len, len, #1
39                 adcs    sum, sum, ip, put_byte_1        @ update checksum
40                 strb    ip, [dst], #1
41                 tst     dst, #2
42                 moveq   pc, lr                  @ dst is now 32bit aligned
43
44 .dst_16bit:     load2b  r8, ip
45                 sub     len, len, #2
46                 adcs    sum, sum, r8, put_byte_0
47                 strb    r8, [dst], #1
48                 adcs    sum, sum, ip, put_byte_1
49                 strb    ip, [dst], #1
50                 mov     pc, lr                  @ dst is now 32bit aligned
51
52                 /*
53                  * Handle 0 to 7 bytes, with any alignment of source and
54                  * destination pointers.  Note that when we get here, C = 0
55                  */
56 .less8:         teq     len, #0                 @ check for zero count
57                 beq     .zero
58
59                 /* we must have at least one byte. */
60                 tst     dst, #1                 @ dst 16-bit aligned
61                 beq     .less8_aligned
62
63                 /* Align dst */
64                 load1b  ip
65                 sub     len, len, #1
66                 adcs    sum, sum, ip, put_byte_1        @ update checksum
67                 strb    ip, [dst], #1
68                 tst     len, #6
69                 beq     .less8_byteonly
70
71 1:              load2b  r8, ip
72                 sub     len, len, #2
73                 adcs    sum, sum, r8, put_byte_0
74                 strb    r8, [dst], #1
75                 adcs    sum, sum, ip, put_byte_1
76                 strb    ip, [dst], #1
77 .less8_aligned: tst     len, #6
78                 bne     1b
79 .less8_byteonly:
80                 tst     len, #1
81                 beq     .done
82                 load1b  r8
83                 adcs    sum, sum, r8, put_byte_0        @ update checksum
84                 strb    r8, [dst], #1
85                 b       .done
86
87 FN_ENTRY
88                 mov     ip, sp
89                 save_regs
90                 sub     fp, ip, #4
91
92                 cmp     len, #8                 @ Ensure that we have at least
93                 blo     .less8                  @ 8 bytes to copy.
94
95                 adds    sum, sum, #0            @ C = 0
96                 tst     dst, #3                 @ Test destination alignment
97                 blne    .dst_unaligned          @ align destination, return here
98
99                 /*
100                  * Ok, the dst pointer is now 32bit aligned, and we know
101                  * that we must have more than 4 bytes to copy.  Note
102                  * that C contains the carry from the dst alignment above.
103                  */
104
105                 tst     src, #3                 @ Test source alignment
106                 bne     .src_not_aligned
107
108                 /* Routine for src & dst aligned */
109
110                 bics    ip, len, #15
111                 beq     2f
112
113 1:              load4l  r4, r5, r6, r7
114                 stmia   dst!, {r4, r5, r6, r7}
115                 adcs    sum, sum, r4
116                 adcs    sum, sum, r5
117                 adcs    sum, sum, r6
118                 adcs    sum, sum, r7
119                 sub     ip, ip, #16
120                 teq     ip, #0
121                 bne     1b
122
123 2:              ands    ip, len, #12
124                 beq     4f
125                 tst     ip, #8
126                 beq     3f
127                 load2l  r4, r5
128                 stmia   dst!, {r4, r5}
129                 adcs    sum, sum, r4
130                 adcs    sum, sum, r5
131                 tst     ip, #4
132                 beq     4f
133
134 3:              load1l  r4
135                 str     r4, [dst], #4
136                 adcs    sum, sum, r4
137
138 4:              ands    len, len, #3
139                 beq     .done
140                 load1l  r4
141                 tst     len, #2
142                 mov     r5, r4, get_byte_0
143                 beq     .exit
144                 adcs    sum, sum, r4, push #16
145                 strb    r5, [dst], #1
146                 mov     r5, r4, get_byte_1
147                 strb    r5, [dst], #1
148                 mov     r5, r4, get_byte_2
149 .exit:          tst     len, #1
150                 strneb  r5, [dst], #1
151                 andne   r5, r5, #255
152                 adcnes  sum, sum, r5, put_byte_0
153
154                 /*
155                  * If the dst pointer was not 16-bit aligned, we
156                  * need to rotate the checksum here to get around
157                  * the inefficient byte manipulations in the
158                  * architecture independent code.
159                  */
160 .done:          adc     r0, sum, #0
161                 ldr     sum, [sp, #0]           @ dst
162                 tst     sum, #1
163                 movne   r0, r0, ror #8
164                 load_regs       ea
165
166 .src_not_aligned:
167                 adc     sum, sum, #0            @ include C from dst alignment
168                 and     ip, src, #3
169                 bic     src, src, #3
170                 load1l  r5
171                 cmp     ip, #2
172                 beq     .src2_aligned
173                 bhi     .src3_aligned
174                 mov     r4, r5, pull #8         @ C = 0
175                 bics    ip, len, #15
176                 beq     2f
177 1:              load4l  r5, r6, r7, r8
178                 orr     r4, r4, r5, push #24
179                 mov     r5, r5, pull #8
180                 orr     r5, r5, r6, push #24
181                 mov     r6, r6, pull #8
182                 orr     r6, r6, r7, push #24
183                 mov     r7, r7, pull #8
184                 orr     r7, r7, r8, push #24
185                 stmia   dst!, {r4, r5, r6, r7}
186                 adcs    sum, sum, r4
187                 adcs    sum, sum, r5
188                 adcs    sum, sum, r6
189                 adcs    sum, sum, r7
190                 mov     r4, r8, pull #8
191                 sub     ip, ip, #16
192                 teq     ip, #0
193                 bne     1b
194 2:              ands    ip, len, #12
195                 beq     4f
196                 tst     ip, #8
197                 beq     3f
198                 load2l  r5, r6
199                 orr     r4, r4, r5, push #24
200                 mov     r5, r5, pull #8
201                 orr     r5, r5, r6, push #24
202                 stmia   dst!, {r4, r5}
203                 adcs    sum, sum, r4
204                 adcs    sum, sum, r5
205                 mov     r4, r6, pull #8
206                 tst     ip, #4
207                 beq     4f
208 3:              load1l  r5
209                 orr     r4, r4, r5, push #24
210                 str     r4, [dst], #4
211                 adcs    sum, sum, r4
212                 mov     r4, r5, pull #8
213 4:              ands    len, len, #3
214                 beq     .done
215                 mov     r5, r4, get_byte_0
216                 tst     len, #2
217                 beq     .exit
218                 adcs    sum, sum, r4, push #16
219                 strb    r5, [dst], #1
220                 mov     r5, r4, get_byte_1
221                 strb    r5, [dst], #1
222                 mov     r5, r4, get_byte_2
223                 b       .exit
224
225 .src2_aligned:  mov     r4, r5, pull #16
226                 adds    sum, sum, #0
227                 bics    ip, len, #15
228                 beq     2f
229 1:              load4l  r5, r6, r7, r8
230                 orr     r4, r4, r5, push #16
231                 mov     r5, r5, pull #16
232                 orr     r5, r5, r6, push #16
233                 mov     r6, r6, pull #16
234                 orr     r6, r6, r7, push #16
235                 mov     r7, r7, pull #16
236                 orr     r7, r7, r8, push #16
237                 stmia   dst!, {r4, r5, r6, r7}
238                 adcs    sum, sum, r4
239                 adcs    sum, sum, r5
240                 adcs    sum, sum, r6
241                 adcs    sum, sum, r7
242                 mov     r4, r8, pull #16
243                 sub     ip, ip, #16
244                 teq     ip, #0
245                 bne     1b
246 2:              ands    ip, len, #12
247                 beq     4f
248                 tst     ip, #8
249                 beq     3f
250                 load2l  r5, r6
251                 orr     r4, r4, r5, push #16
252                 mov     r5, r5, pull #16
253                 orr     r5, r5, r6, push #16
254                 stmia   dst!, {r4, r5}
255                 adcs    sum, sum, r4
256                 adcs    sum, sum, r5
257                 mov     r4, r6, pull #16
258                 tst     ip, #4
259                 beq     4f
260 3:              load1l  r5
261                 orr     r4, r4, r5, push #16
262                 str     r4, [dst], #4
263                 adcs    sum, sum, r4
264                 mov     r4, r5, pull #16
265 4:              ands    len, len, #3
266                 beq     .done
267                 mov     r5, r4, get_byte_0
268                 tst     len, #2
269                 beq     .exit
270                 adcs    sum, sum, r4
271                 strb    r5, [dst], #1
272                 mov     r5, r4, get_byte_1
273                 strb    r5, [dst], #1
274                 tst     len, #1
275                 beq     .done
276                 load1b  r5
277                 b       .exit
278
279 .src3_aligned:  mov     r4, r5, pull #24
280                 adds    sum, sum, #0
281                 bics    ip, len, #15
282                 beq     2f
283 1:              load4l  r5, r6, r7, r8
284                 orr     r4, r4, r5, push #8
285                 mov     r5, r5, pull #24
286                 orr     r5, r5, r6, push #8
287                 mov     r6, r6, pull #24
288                 orr     r6, r6, r7, push #8
289                 mov     r7, r7, pull #24
290                 orr     r7, r7, r8, push #8
291                 stmia   dst!, {r4, r5, r6, r7}
292                 adcs    sum, sum, r4
293                 adcs    sum, sum, r5
294                 adcs    sum, sum, r6
295                 adcs    sum, sum, r7
296                 mov     r4, r8, pull #24
297                 sub     ip, ip, #16
298                 teq     ip, #0
299                 bne     1b
300 2:              ands    ip, len, #12
301                 beq     4f
302                 tst     ip, #8
303                 beq     3f
304                 load2l  r5, r6
305                 orr     r4, r4, r5, push #8
306                 mov     r5, r5, pull #24
307                 orr     r5, r5, r6, push #8
308                 stmia   dst!, {r4, r5}
309                 adcs    sum, sum, r4
310                 adcs    sum, sum, r5
311                 mov     r4, r6, pull #24
312                 tst     ip, #4
313                 beq     4f
314 3:              load1l  r5
315                 orr     r4, r4, r5, push #8
316                 str     r4, [dst], #4
317                 adcs    sum, sum, r4
318                 mov     r4, r5, pull #24
319 4:              ands    len, len, #3
320                 beq     .done
321                 mov     r5, r4, get_byte_0
322                 tst     len, #2
323                 beq     .exit
324                 strb    r5, [dst], #1
325                 adcs    sum, sum, r4
326                 load1l  r4
327                 mov     r5, r4, get_byte_0
328                 strb    r5, [dst], #1
329                 adcs    sum, sum, r4, push #24
330                 mov     r5, r4, get_byte_1
331                 b       .exit