Linux-2.6.12-rc2
[linux-3.10.git] / arch / arm26 / lib / csumpartialcopygeneric.S
1 /*
2  *  linux/arch/arm26/lib/csumpartialcopygeneric.S
3  *
4  *  Copyright (C) 1995-2001 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * JMA 01/06/03 Commented out some shl0s; probobly irrelevant to arm26 
11  *
12  */
13
14 /*
15  * unsigned int
16  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
17  *  r0 = src, r1 = dst, r2 = len, r3 = sum
18  *  Returns : r0 = checksum
19  *
20  * Note that 'tst' and 'teq' preserve the carry flag.
21  */
22
23 /* Quick hack */
24                 .macro  save_regs
25                 stmfd   sp!, {r1, r4 - r8, fp, ip, lr, pc}
26                 .endm
27
28 /* end Quick Hack */
29
30 src     .req    r0
31 dst     .req    r1
32 len     .req    r2
33 sum     .req    r3
34
35 .zero:          mov     r0, sum
36                 load_regs       ea
37
38                 /*
39                  * Align an unaligned destination pointer.  We know that
40                  * we have >= 8 bytes here, so we don't need to check
41                  * the length.  Note that the source pointer hasn't been
42                  * aligned yet.
43                  */
44 .dst_unaligned: tst     dst, #1
45                 beq     .dst_16bit
46
47                 load1b  ip
48                 sub     len, len, #1
49                 adcs    sum, sum, ip, lsl #byte(1)      @ update checksum
50                 strb    ip, [dst], #1
51                 tst     dst, #2
52                 moveq   pc, lr                  @ dst is now 32bit aligned
53
54 .dst_16bit:     load2b  r8, ip
55                 sub     len, len, #2
56                 adcs    sum, sum, r8, lsl #byte(0)
57                 strb    r8, [dst], #1
58                 adcs    sum, sum, ip, lsl #byte(1)
59                 strb    ip, [dst], #1
60                 mov     pc, lr                  @ dst is now 32bit aligned
61
62                 /*
63                  * Handle 0 to 7 bytes, with any alignment of source and
64                  * destination pointers.  Note that when we get here, C = 0
65                  */
66 .less8:         teq     len, #0                 @ check for zero count
67                 beq     .zero
68
69                 /* we must have at least one byte. */
70                 tst     dst, #1                 @ dst 16-bit aligned
71                 beq     .less8_aligned
72
73                 /* Align dst */
74                 load1b  ip
75                 sub     len, len, #1
76                 adcs    sum, sum, ip, lsl #byte(1)      @ update checksum
77                 strb    ip, [dst], #1
78                 tst     len, #6
79                 beq     .less8_byteonly
80
81 1:              load2b  r8, ip
82                 sub     len, len, #2
83                 adcs    sum, sum, r8, lsl #byte(0)
84                 strb    r8, [dst], #1
85                 adcs    sum, sum, ip, lsl #byte(1)
86                 strb    ip, [dst], #1
87 .less8_aligned: tst     len, #6
88                 bne     1b
89 .less8_byteonly:
90                 tst     len, #1
91                 beq     .done
92                 load1b  r8
93                 adcs    sum, sum, r8, lsl #byte(0)      @ update checksum
94                 strb    r8, [dst], #1
95                 b       .done
96
97 FN_ENTRY
98                 mov     ip, sp
99                 save_regs
100                 sub     fp, ip, #4
101
102                 cmp     len, #8                 @ Ensure that we have at least
103                 blo     .less8                  @ 8 bytes to copy.
104
105                 adds    sum, sum, #0            @ C = 0
106                 tst     dst, #3                 @ Test destination alignment
107                 blne    .dst_unaligned          @ align destination, return here
108
109                 /*
110                  * Ok, the dst pointer is now 32bit aligned, and we know
111                  * that we must have more than 4 bytes to copy.  Note
112                  * that C contains the carry from the dst alignment above.
113                  */
114
115                 tst     src, #3                 @ Test source alignment
116                 bne     .src_not_aligned
117
118                 /* Routine for src & dst aligned */
119
120                 bics    ip, len, #15
121                 beq     2f
122
123 1:              load4l  r4, r5, r6, r7
124                 stmia   dst!, {r4, r5, r6, r7}
125                 adcs    sum, sum, r4
126                 adcs    sum, sum, r5
127                 adcs    sum, sum, r6
128                 adcs    sum, sum, r7
129                 sub     ip, ip, #16
130                 teq     ip, #0
131                 bne     1b
132
133 2:              ands    ip, len, #12
134                 beq     4f
135                 tst     ip, #8
136                 beq     3f
137                 load2l  r4, r5
138                 stmia   dst!, {r4, r5}
139                 adcs    sum, sum, r4
140                 adcs    sum, sum, r5
141                 tst     ip, #4
142                 beq     4f
143
144 3:              load1l  r4
145                 str     r4, [dst], #4
146                 adcs    sum, sum, r4
147
148 4:              ands    len, len, #3
149                 beq     .done
150                 load1l  r4
151                 tst     len, #2
152 /*              mov     r5, r4, lsr #byte(0)
153 FIXME? 0 Shift anyhow!
154 */
155                 beq     .exit
156                 adcs    sum, sum, r4, push #16
157                 strb    r5, [dst], #1
158                 mov     r5, r4, lsr #byte(1)
159                 strb    r5, [dst], #1
160                 mov     r5, r4, lsr #byte(2)
161 .exit:          tst     len, #1
162                 strneb  r5, [dst], #1
163                 andne   r5, r5, #255
164                 adcnes  sum, sum, r5, lsl #byte(0)
165
166                 /*
167                  * If the dst pointer was not 16-bit aligned, we
168                  * need to rotate the checksum here to get around
169                  * the inefficient byte manipulations in the
170                  * architecture independent code.
171                  */
172 .done:          adc     r0, sum, #0
173                 ldr     sum, [sp, #0]           @ dst
174                 tst     sum, #1
175                 movne   sum, r0, lsl #8
176                 orrne   r0, sum, r0, lsr #24
177                 load_regs       ea
178
179 .src_not_aligned:
180                 adc     sum, sum, #0            @ include C from dst alignment
181                 and     ip, src, #3
182                 bic     src, src, #3
183                 load1l  r5
184                 cmp     ip, #2
185                 beq     .src2_aligned
186                 bhi     .src3_aligned
187                 mov     r4, r5, pull #8         @ C = 0
188                 bics    ip, len, #15
189                 beq     2f
190 1:              load4l  r5, r6, r7, r8
191                 orr     r4, r4, r5, push #24
192                 mov     r5, r5, pull #8
193                 orr     r5, r5, r6, push #24
194                 mov     r6, r6, pull #8
195                 orr     r6, r6, r7, push #24
196                 mov     r7, r7, pull #8
197                 orr     r7, r7, r8, push #24
198                 stmia   dst!, {r4, r5, r6, r7}
199                 adcs    sum, sum, r4
200                 adcs    sum, sum, r5
201                 adcs    sum, sum, r6
202                 adcs    sum, sum, r7
203                 mov     r4, r8, pull #8
204                 sub     ip, ip, #16
205                 teq     ip, #0
206                 bne     1b
207 2:              ands    ip, len, #12
208                 beq     4f
209                 tst     ip, #8
210                 beq     3f
211                 load2l  r5, r6
212                 orr     r4, r4, r5, push #24
213                 mov     r5, r5, pull #8
214                 orr     r5, r5, r6, push #24
215                 stmia   dst!, {r4, r5}
216                 adcs    sum, sum, r4
217                 adcs    sum, sum, r5
218                 mov     r4, r6, pull #8
219                 tst     ip, #4
220                 beq     4f
221 3:              load1l  r5
222                 orr     r4, r4, r5, push #24
223                 str     r4, [dst], #4
224                 adcs    sum, sum, r4
225                 mov     r4, r5, pull #8
226 4:              ands    len, len, #3
227                 beq     .done
228 /*              mov     r5, r4, lsr #byte(0)
229 FIXME? 0 Shift anyhow
230 */
231                 tst     len, #2
232                 beq     .exit
233                 adcs    sum, sum, r4, push #16
234                 strb    r5, [dst], #1
235                 mov     r5, r4, lsr #byte(1)
236                 strb    r5, [dst], #1
237                 mov     r5, r4, lsr #byte(2)
238                 b       .exit
239
240 .src2_aligned:  mov     r4, r5, pull #16
241                 adds    sum, sum, #0
242                 bics    ip, len, #15
243                 beq     2f
244 1:              load4l  r5, r6, r7, r8
245                 orr     r4, r4, r5, push #16
246                 mov     r5, r5, pull #16
247                 orr     r5, r5, r6, push #16
248                 mov     r6, r6, pull #16
249                 orr     r6, r6, r7, push #16
250                 mov     r7, r7, pull #16
251                 orr     r7, r7, r8, push #16
252                 stmia   dst!, {r4, r5, r6, r7}
253                 adcs    sum, sum, r4
254                 adcs    sum, sum, r5
255                 adcs    sum, sum, r6
256                 adcs    sum, sum, r7
257                 mov     r4, r8, pull #16
258                 sub     ip, ip, #16
259                 teq     ip, #0
260                 bne     1b
261 2:              ands    ip, len, #12
262                 beq     4f
263                 tst     ip, #8
264                 beq     3f
265                 load2l  r5, r6
266                 orr     r4, r4, r5, push #16
267                 mov     r5, r5, pull #16
268                 orr     r5, r5, r6, push #16
269                 stmia   dst!, {r4, r5}
270                 adcs    sum, sum, r4
271                 adcs    sum, sum, r5
272                 mov     r4, r6, pull #16
273                 tst     ip, #4
274                 beq     4f
275 3:              load1l  r5
276                 orr     r4, r4, r5, push #16
277                 str     r4, [dst], #4
278                 adcs    sum, sum, r4
279                 mov     r4, r5, pull #16
280 4:              ands    len, len, #3
281                 beq     .done
282 /*              mov     r5, r4, lsr #byte(0)
283 FIXME? 0 Shift anyhow
284 */
285                 tst     len, #2
286                 beq     .exit
287                 adcs    sum, sum, r4
288                 strb    r5, [dst], #1
289                 mov     r5, r4, lsr #byte(1)
290                 strb    r5, [dst], #1
291                 tst     len, #1
292                 beq     .done
293                 load1b  r5
294                 b       .exit
295
296 .src3_aligned:  mov     r4, r5, pull #24
297                 adds    sum, sum, #0
298                 bics    ip, len, #15
299                 beq     2f
300 1:              load4l  r5, r6, r7, r8
301                 orr     r4, r4, r5, push #8
302                 mov     r5, r5, pull #24
303                 orr     r5, r5, r6, push #8
304                 mov     r6, r6, pull #24
305                 orr     r6, r6, r7, push #8
306                 mov     r7, r7, pull #24
307                 orr     r7, r7, r8, push #8
308                 stmia   dst!, {r4, r5, r6, r7}
309                 adcs    sum, sum, r4
310                 adcs    sum, sum, r5
311                 adcs    sum, sum, r6
312                 adcs    sum, sum, r7
313                 mov     r4, r8, pull #24
314                 sub     ip, ip, #16
315                 teq     ip, #0
316                 bne     1b
317 2:              ands    ip, len, #12
318                 beq     4f
319                 tst     ip, #8
320                 beq     3f
321                 load2l  r5, r6
322                 orr     r4, r4, r5, push #8
323                 mov     r5, r5, pull #24
324                 orr     r5, r5, r6, push #8
325                 stmia   dst!, {r4, r5}
326                 adcs    sum, sum, r4
327                 adcs    sum, sum, r5
328                 mov     r4, r6, pull #24
329                 tst     ip, #4
330                 beq     4f
331 3:              load1l  r5
332                 orr     r4, r4, r5, push #8
333                 str     r4, [dst], #4
334                 adcs    sum, sum, r4
335                 mov     r4, r5, pull #24
336 4:              ands    len, len, #3
337                 beq     .done
338 /*              mov     r5, r4, lsr #byte(0)
339 FIXME? 0 Shift anyhow
340 */
341                 tst     len, #2
342                 beq     .exit
343                 strb    r5, [dst], #1
344                 adcs    sum, sum, r4
345                 load1l  r4
346 /*              mov     r5, r4, lsr #byte(0)
347 FIXME? 0 Shift anyhow
348 */
349                 strb    r5, [dst], #1
350                 adcs    sum, sum, r4, push #24
351                 mov     r5, r4, lsr #byte(1)
352                 b       .exit