[PATCH] remove powerpc bitops in favor of existing generic bitops
[linux-2.6.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11
12         .align  7
13 _GLOBAL(__copy_tofrom_user)
14         /* first check for a whole page copy on a page boundary */
15         cmpldi  cr1,r5,16
16         cmpdi   cr6,r5,4096
17         or      r0,r3,r4
18         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
19         andi.   r0,r0,4095
20         std     r3,-24(r1)
21         crand   cr0*4+2,cr0*4+2,cr6*4+2
22         std     r4,-16(r1)
23         std     r5,-8(r1)
24         dcbt    0,r4
25         beq     .Lcopy_page_4K
26         andi.   r6,r6,7
27         mtcrf   0x01,r5
28         blt     cr1,.Lshort_copy
29         bne     .Ldst_unaligned
30 .Ldst_aligned:
31         andi.   r0,r4,7
32         addi    r3,r3,-16
33         bne     .Lsrc_unaligned
34         srdi    r7,r5,4
35 20:     ld      r9,0(r4)
36         addi    r4,r4,-8
37         mtctr   r7
38         andi.   r5,r5,7
39         bf      cr7*4+0,22f
40         addi    r3,r3,8
41         addi    r4,r4,8
42         mr      r8,r9
43         blt     cr1,72f
44 21:     ld      r9,8(r4)
45 70:     std     r8,8(r3)
46 22:     ldu     r8,16(r4)
47 71:     stdu    r9,16(r3)
48         bdnz    21b
49 72:     std     r8,8(r3)
50         beq+    3f
51         addi    r3,r3,16
52 23:     ld      r9,8(r4)
53 .Ldo_tail:
54         bf      cr7*4+1,1f
55         rotldi  r9,r9,32
56 73:     stw     r9,0(r3)
57         addi    r3,r3,4
58 1:      bf      cr7*4+2,2f
59         rotldi  r9,r9,16
60 74:     sth     r9,0(r3)
61         addi    r3,r3,2
62 2:      bf      cr7*4+3,3f
63         rotldi  r9,r9,8
64 75:     stb     r9,0(r3)
65 3:      li      r3,0
66         blr
67
68 .Lsrc_unaligned:
69         srdi    r6,r5,3
70         addi    r5,r5,-16
71         subf    r4,r0,r4
72         srdi    r7,r5,4
73         sldi    r10,r0,3
74         cmpldi  cr6,r6,3
75         andi.   r5,r5,7
76         mtctr   r7
77         subfic  r11,r10,64
78         add     r5,r5,r0
79         bt      cr7*4+0,28f
80
81 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
82 25:     ld      r0,8(r4)
83         sld     r6,r9,r10
84 26:     ldu     r9,16(r4)
85         srd     r7,r0,r11
86         sld     r8,r0,r10
87         or      r7,r7,r6
88         blt     cr6,79f
89 27:     ld      r0,8(r4)
90         b       2f
91
92 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
93 29:     ldu     r9,8(r4)
94         sld     r8,r0,r10
95         addi    r3,r3,-8
96         blt     cr6,5f
97 30:     ld      r0,8(r4)
98         srd     r12,r9,r11
99         sld     r6,r9,r10
100 31:     ldu     r9,16(r4)
101         or      r12,r8,r12
102         srd     r7,r0,r11
103         sld     r8,r0,r10
104         addi    r3,r3,16
105         beq     cr6,78f
106
107 1:      or      r7,r7,r6
108 32:     ld      r0,8(r4)
109 76:     std     r12,8(r3)
110 2:      srd     r12,r9,r11
111         sld     r6,r9,r10
112 33:     ldu     r9,16(r4)
113         or      r12,r8,r12
114 77:     stdu    r7,16(r3)
115         srd     r7,r0,r11
116         sld     r8,r0,r10
117         bdnz    1b
118
119 78:     std     r12,8(r3)
120         or      r7,r7,r6
121 79:     std     r7,16(r3)
122 5:      srd     r12,r9,r11
123         or      r12,r8,r12
124 80:     std     r12,24(r3)
125         bne     6f
126         li      r3,0
127         blr
128 6:      cmpwi   cr1,r5,8
129         addi    r3,r3,32
130         sld     r9,r9,r10
131         ble     cr1,.Ldo_tail
132 34:     ld      r0,8(r4)
133         srd     r7,r0,r11
134         or      r9,r7,r9
135         b       .Ldo_tail
136
137 .Ldst_unaligned:
138         mtcrf   0x01,r6         /* put #bytes to 8B bdry into cr7 */
139         subf    r5,r6,r5
140         li      r7,0
141         cmpldi  r1,r5,16
142         bf      cr7*4+3,1f
143 35:     lbz     r0,0(r4)
144 81:     stb     r0,0(r3)
145         addi    r7,r7,1
146 1:      bf      cr7*4+2,2f
147 36:     lhzx    r0,r7,r4
148 82:     sthx    r0,r7,r3
149         addi    r7,r7,2
150 2:      bf      cr7*4+1,3f
151 37:     lwzx    r0,r7,r4
152 83:     stwx    r0,r7,r3
153 3:      mtcrf   0x01,r5
154         add     r4,r6,r4
155         add     r3,r6,r3
156         b       .Ldst_aligned
157
158 .Lshort_copy:
159         bf      cr7*4+0,1f
160 38:     lwz     r0,0(r4)
161 39:     lwz     r9,4(r4)
162         addi    r4,r4,8
163 84:     stw     r0,0(r3)
164 85:     stw     r9,4(r3)
165         addi    r3,r3,8
166 1:      bf      cr7*4+1,2f
167 40:     lwz     r0,0(r4)
168         addi    r4,r4,4
169 86:     stw     r0,0(r3)
170         addi    r3,r3,4
171 2:      bf      cr7*4+2,3f
172 41:     lhz     r0,0(r4)
173         addi    r4,r4,2
174 87:     sth     r0,0(r3)
175         addi    r3,r3,2
176 3:      bf      cr7*4+3,4f
177 42:     lbz     r0,0(r4)
178 88:     stb     r0,0(r3)
179 4:      li      r3,0
180         blr
181
182 /*
183  * exception handlers follow
184  * we have to return the number of bytes not copied
185  * for an exception on a load, we set the rest of the destination to 0
186  */
187
188 136:
189 137:
190         add     r3,r3,r7
191         b       1f
192 130:
193 131:
194         addi    r3,r3,8
195 120:
196 122:
197 124:
198 125:
199 126:
200 127:
201 128:
202 129:
203 133:
204         addi    r3,r3,8
205 121:
206 132:
207         addi    r3,r3,8
208 123:
209 134:
210 135:
211 138:
212 139:
213 140:
214 141:
215 142:
216
217 /*
218  * here we have had a fault on a load and r3 points to the first
219  * unmodified byte of the destination
220  */
221 1:      ld      r6,-24(r1)
222         ld      r4,-16(r1)
223         ld      r5,-8(r1)
224         subf    r6,r6,r3
225         add     r4,r4,r6
226         subf    r5,r6,r5        /* #bytes left to go */
227
228 /*
229  * first see if we can copy any more bytes before hitting another exception
230  */
231         mtctr   r5
232 43:     lbz     r0,0(r4)
233         addi    r4,r4,1
234 89:     stb     r0,0(r3)
235         addi    r3,r3,1
236         bdnz    43b
237         li      r3,0            /* huh? all copied successfully this time? */
238         blr
239
240 /*
241  * here we have trapped again, need to clear ctr bytes starting at r3
242  */
243 143:    mfctr   r5
244         li      r0,0
245         mr      r4,r3
246         mr      r3,r5           /* return the number of bytes not copied */
247 1:      andi.   r9,r4,7
248         beq     3f
249 90:     stb     r0,0(r4)
250         addic.  r5,r5,-1
251         addi    r4,r4,1
252         bne     1b
253         blr
254 3:      cmpldi  cr1,r5,8
255         srdi    r9,r5,3
256         andi.   r5,r5,7
257         blt     cr1,93f
258         mtctr   r9
259 91:     std     r0,0(r4)
260         addi    r4,r4,8
261         bdnz    91b
262 93:     beqlr
263         mtctr   r5      
264 92:     stb     r0,0(r4)
265         addi    r4,r4,1
266         bdnz    92b
267         blr
268
269 /*
270  * exception handlers for stores: we just need to work
271  * out how many bytes weren't copied
272  */
273 182:
274 183:
275         add     r3,r3,r7
276         b       1f
277 180:
278         addi    r3,r3,8
279 171:
280 177:
281         addi    r3,r3,8
282 170:
283 172:
284 176:
285 178:
286         addi    r3,r3,4
287 185:
288         addi    r3,r3,4
289 173:
290 174:
291 175:
292 179:
293 181:
294 184:
295 186:
296 187:
297 188:
298 189:    
299 1:
300         ld      r6,-24(r1)
301         ld      r5,-8(r1)
302         add     r6,r6,r5
303         subf    r3,r3,r6        /* #bytes not copied */
304 190:
305 191:
306 192:
307         blr                     /* #bytes not copied in r3 */
308
309         .section __ex_table,"a"
310         .align  3
311         .llong  20b,120b
312         .llong  21b,121b
313         .llong  70b,170b
314         .llong  22b,122b
315         .llong  71b,171b
316         .llong  72b,172b
317         .llong  23b,123b
318         .llong  73b,173b
319         .llong  74b,174b
320         .llong  75b,175b
321         .llong  24b,124b
322         .llong  25b,125b
323         .llong  26b,126b
324         .llong  27b,127b
325         .llong  28b,128b
326         .llong  29b,129b
327         .llong  30b,130b
328         .llong  31b,131b
329         .llong  32b,132b
330         .llong  76b,176b
331         .llong  33b,133b
332         .llong  77b,177b
333         .llong  78b,178b
334         .llong  79b,179b
335         .llong  80b,180b
336         .llong  34b,134b
337         .llong  35b,135b
338         .llong  81b,181b
339         .llong  36b,136b
340         .llong  82b,182b
341         .llong  37b,137b
342         .llong  83b,183b
343         .llong  38b,138b
344         .llong  39b,139b
345         .llong  84b,184b
346         .llong  85b,185b
347         .llong  40b,140b
348         .llong  86b,186b
349         .llong  41b,141b
350         .llong  87b,187b
351         .llong  42b,142b
352         .llong  88b,188b
353         .llong  43b,143b
354         .llong  89b,189b
355         .llong  90b,190b
356         .llong  91b,191b
357         .llong  92b,192b
358         
359         .text
360
361 /*
362  * Routine to copy a whole page of data, optimized for POWER4.
363  * On POWER4 it is more than 50% faster than the simple loop
364  * above (following the .Ldst_aligned label) but it runs slightly
365  * slower on POWER3.
366  */
367 .Lcopy_page_4K:
368         std     r31,-32(1)
369         std     r30,-40(1)
370         std     r29,-48(1)
371         std     r28,-56(1)
372         std     r27,-64(1)
373         std     r26,-72(1)
374         std     r25,-80(1)
375         std     r24,-88(1)
376         std     r23,-96(1)
377         std     r22,-104(1)
378         std     r21,-112(1)
379         std     r20,-120(1)
380         li      r5,4096/32 - 1
381         addi    r3,r3,-8
382         li      r0,5
383 0:      addi    r5,r5,-24
384         mtctr   r0
385 20:     ld      r22,640(4)
386 21:     ld      r21,512(4)
387 22:     ld      r20,384(4)
388 23:     ld      r11,256(4)
389 24:     ld      r9,128(4)
390 25:     ld      r7,0(4)
391 26:     ld      r25,648(4)
392 27:     ld      r24,520(4)
393 28:     ld      r23,392(4)
394 29:     ld      r10,264(4)
395 30:     ld      r8,136(4)
396 31:     ldu     r6,8(4)
397         cmpwi   r5,24
398 1:
399 32:     std     r22,648(3)
400 33:     std     r21,520(3)
401 34:     std     r20,392(3)
402 35:     std     r11,264(3)
403 36:     std     r9,136(3)
404 37:     std     r7,8(3)
405 38:     ld      r28,648(4)
406 39:     ld      r27,520(4)
407 40:     ld      r26,392(4)
408 41:     ld      r31,264(4)
409 42:     ld      r30,136(4)
410 43:     ld      r29,8(4)
411 44:     std     r25,656(3)
412 45:     std     r24,528(3)
413 46:     std     r23,400(3)
414 47:     std     r10,272(3)
415 48:     std     r8,144(3)
416 49:     std     r6,16(3)
417 50:     ld      r22,656(4)
418 51:     ld      r21,528(4)
419 52:     ld      r20,400(4)
420 53:     ld      r11,272(4)
421 54:     ld      r9,144(4)
422 55:     ld      r7,16(4)
423 56:     std     r28,664(3)
424 57:     std     r27,536(3)
425 58:     std     r26,408(3)
426 59:     std     r31,280(3)
427 60:     std     r30,152(3)
428 61:     stdu    r29,24(3)
429 62:     ld      r25,664(4)
430 63:     ld      r24,536(4)
431 64:     ld      r23,408(4)
432 65:     ld      r10,280(4)
433 66:     ld      r8,152(4)
434 67:     ldu     r6,24(4)
435         bdnz    1b
436 68:     std     r22,648(3)
437 69:     std     r21,520(3)
438 70:     std     r20,392(3)
439 71:     std     r11,264(3)
440 72:     std     r9,136(3)
441 73:     std     r7,8(3)
442 74:     addi    r4,r4,640
443 75:     addi    r3,r3,648
444         bge     0b
445         mtctr   r5
446 76:     ld      r7,0(4)
447 77:     ld      r8,8(4)
448 78:     ldu     r9,16(4)
449 3:
450 79:     ld      r10,8(4)
451 80:     std     r7,8(3)
452 81:     ld      r7,16(4)
453 82:     std     r8,16(3)
454 83:     ld      r8,24(4)
455 84:     std     r9,24(3)
456 85:     ldu     r9,32(4)
457 86:     stdu    r10,32(3)
458         bdnz    3b
459 4:
460 87:     ld      r10,8(4)
461 88:     std     r7,8(3)
462 89:     std     r8,16(3)
463 90:     std     r9,24(3)
464 91:     std     r10,32(3)
465 9:      ld      r20,-120(1)
466         ld      r21,-112(1)
467         ld      r22,-104(1)
468         ld      r23,-96(1)
469         ld      r24,-88(1)
470         ld      r25,-80(1)
471         ld      r26,-72(1)
472         ld      r27,-64(1)
473         ld      r28,-56(1)
474         ld      r29,-48(1)
475         ld      r30,-40(1)
476         ld      r31,-32(1)
477         li      r3,0
478         blr
479
480 /*
481  * on an exception, reset to the beginning and jump back into the
482  * standard __copy_tofrom_user
483  */
484 100:    ld      r20,-120(1)
485         ld      r21,-112(1)
486         ld      r22,-104(1)
487         ld      r23,-96(1)
488         ld      r24,-88(1)
489         ld      r25,-80(1)
490         ld      r26,-72(1)
491         ld      r27,-64(1)
492         ld      r28,-56(1)
493         ld      r29,-48(1)
494         ld      r30,-40(1)
495         ld      r31,-32(1)
496         ld      r3,-24(r1)
497         ld      r4,-16(r1)
498         li      r5,4096
499         b       .Ldst_aligned
500
501         .section __ex_table,"a"
502         .align  3
503         .llong  20b,100b
504         .llong  21b,100b
505         .llong  22b,100b
506         .llong  23b,100b
507         .llong  24b,100b
508         .llong  25b,100b
509         .llong  26b,100b
510         .llong  27b,100b
511         .llong  28b,100b
512         .llong  29b,100b
513         .llong  30b,100b
514         .llong  31b,100b
515         .llong  32b,100b
516         .llong  33b,100b
517         .llong  34b,100b
518         .llong  35b,100b
519         .llong  36b,100b
520         .llong  37b,100b
521         .llong  38b,100b
522         .llong  39b,100b
523         .llong  40b,100b
524         .llong  41b,100b
525         .llong  42b,100b
526         .llong  43b,100b
527         .llong  44b,100b
528         .llong  45b,100b
529         .llong  46b,100b
530         .llong  47b,100b
531         .llong  48b,100b
532         .llong  49b,100b
533         .llong  50b,100b
534         .llong  51b,100b
535         .llong  52b,100b
536         .llong  53b,100b
537         .llong  54b,100b
538         .llong  55b,100b
539         .llong  56b,100b
540         .llong  57b,100b
541         .llong  58b,100b
542         .llong  59b,100b
543         .llong  60b,100b
544         .llong  61b,100b
545         .llong  62b,100b
546         .llong  63b,100b
547         .llong  64b,100b
548         .llong  65b,100b
549         .llong  66b,100b
550         .llong  67b,100b
551         .llong  68b,100b
552         .llong  69b,100b
553         .llong  70b,100b
554         .llong  71b,100b
555         .llong  72b,100b
556         .llong  73b,100b
557         .llong  74b,100b
558         .llong  75b,100b
559         .llong  76b,100b
560         .llong  77b,100b
561         .llong  78b,100b
562         .llong  79b,100b
563         .llong  80b,100b
564         .llong  81b,100b
565         .llong  82b,100b
566         .llong  83b,100b
567         .llong  84b,100b
568         .llong  85b,100b
569         .llong  86b,100b
570         .llong  87b,100b
571         .llong  88b,100b
572         .llong  89b,100b
573         .llong  90b,100b
574         .llong  91b,100b