2 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Neil A. Carson and Mark Brinicombe
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the NetBSD
19 * Foundation, Inc. and its contributors.
20 * 4. Neither the name of The NetBSD Foundation nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
36 * Adapted for uClibc from NetBSD _memcpy.S,v 1.6 2003/10/09
37 * by Erik Andersen <andersen@codepoet.org>
42 #include <bits/arm_asm.h>
44 #if !defined(THUMB1_ONLY)
46 * This is one fun bit of code ...
47 * Some easy listening music is suggested while trying to understand this
48 * code e.g. Iron Maiden
50 * For anyone attempting to understand it :
52 * The core code is implemented here with simple stubs for memcpy()
53 * memmove() and bcopy().
55 * All local labels are prefixed with Lmemcpy_
56 * Following the prefix a label starting f is used in the forward copy code
57 * while a label using b is used in the backwards copy code
58 * The source and destination addresses determine whether a forward or
59 * backward copy is performed.
60 * Separate bits of code are used to deal with the following situations
61 * for both the forward and backwards copy.
62 * unaligned source address
63 * unaligned destination address
64 * Separate copy routines are used to produce an optimised result for each
66 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
67 * a time where possible.
69 * Note: r12 (aka ip) can be trashed during the function along with
70 * r0-r3 although r0-r2 have defined uses i.e. dest, src, len throughout.
71 * Additional registers are preserved prior to use i.e. r4, r5 & lr
72 * The return value in r0 must be the destination address.
74 * Apologies for the state of the comments ;-)
80 .type _memcpy,%function
83 /* XXX: The Thumb-2 conditionals can be removed if/when we require an
84 assembler that supports unified syntax. */
86 #if defined(__thumb2__)
97 #if defined(__thumb2__)
108 /* Determine copy direction */
110 bcc .Lmemcpy_backwards
112 IT(t, eq) /* Quick abort for src=dst */
113 #if defined(__USE_BX__)
118 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
120 blt .Lmemcpy_fl4 /* less than 4 bytes */
122 bne .Lmemcpy_fdestul /* oh unaligned destination addr */
124 bne .Lmemcpy_fsrcul /* oh unaligned source addr */
127 /* We have aligned source and destination */
129 blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
131 blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
132 str r4, [sp, #-4]! /* borrow r4 */
134 /* blat 32 bytes at a time */
135 /* XXX for really big copies perhaps we should use more registers */
137 ldmia r1!, {r3, r4, r12, lr}
138 stmia r0!, {r3, r4, r12, lr}
139 ldmia r1!, {r3, r4, r12, lr}
140 stmia r0!, {r3, r4, r12, lr}
145 /* blat a remaining 16 bytes */
146 copy "{r3, r4, r12, lr}"
148 ldr r4, [sp], #4 /* restore r4 */
153 /* blat 12 bytes at a time */
156 #if defined(__thumb2__)
175 /* less than 4 bytes to go */
177 #if defined(__thumb2__)
179 popeq {r0, pc} /* done */
180 #elif defined(__ARM_ARCH_4T__)
181 ldmeqia sp!, {r0, r3} /* done */
184 ldmeqia sp!, {r0, pc} /* done */
187 /* copy the crud byte at a time */
191 #if defined(__thumb2__)
204 #if defined(__ARM_ARCH_4T__)
211 /* erg - unaligned destination */
216 /* align destination with byte copies */
219 #if defined(__thumb2__)
233 blt .Lmemcpy_fl4 /* less the 4 bytes */
236 beq .Lmemcpy_ft8 /* we have an aligned source */
238 /* erg - unaligned source */
239 /* This is where it gets nasty ... */
247 blt .Lmemcpy_fsrcul1loop4
251 .Lmemcpy_fsrcul1loop16:
252 #if __BYTE_ORDER == __BIG_ENDIAN
254 ldmia r1!, {r4, r5, r12, lr}
255 orr r3, r3, r4, lsr #24
257 orr r4, r4, r5, lsr #24
259 orr r5, r5, r12, lsr #24
261 orr r12, r12, lr, lsr #24
264 ldmia r1!, {r4, r5, r12, lr}
265 orr r3, r3, r4, lsl #24
267 orr r4, r4, r5, lsl #24
269 orr r5, r5, r12, lsl #24
271 orr r12, r12, lr, lsl #24
273 stmia r0!, {r3-r5, r12}
275 bge .Lmemcpy_fsrcul1loop16
278 blt .Lmemcpy_fsrcul1l4
280 .Lmemcpy_fsrcul1loop4:
281 #if __BYTE_ORDER == __BIG_ENDIAN
284 orr r12, r12, lr, lsr #24
288 orr r12, r12, lr, lsl #24
292 bge .Lmemcpy_fsrcul1loop4
300 blt .Lmemcpy_fsrcul2loop4
304 .Lmemcpy_fsrcul2loop16:
305 #if __BYTE_ORDER == __BIG_ENDIAN
307 ldmia r1!, {r4, r5, r12, lr}
308 orr r3, r3, r4, lsr #16
310 orr r4, r4, r5, lsr #16
312 orr r5, r5, r12, lsr #16
313 mov r12, r12, lsl #16
314 orr r12, r12, lr, lsr #16
317 ldmia r1!, {r4, r5, r12, lr}
318 orr r3, r3, r4, lsl #16
320 orr r4, r4, r5, lsl #16
322 orr r5, r5, r12, lsl #16
323 mov r12, r12, lsr #16
324 orr r12, r12, lr, lsl #16
326 stmia r0!, {r3-r5, r12}
328 bge .Lmemcpy_fsrcul2loop16
331 blt .Lmemcpy_fsrcul2l4
333 .Lmemcpy_fsrcul2loop4:
334 #if __BYTE_ORDER == __BIG_ENDIAN
337 orr r12, r12, lr, lsr #16
341 orr r12, r12, lr, lsl #16
345 bge .Lmemcpy_fsrcul2loop4
353 blt .Lmemcpy_fsrcul3loop4
357 .Lmemcpy_fsrcul3loop16:
358 #if __BYTE_ORDER == __BIG_ENDIAN
360 ldmia r1!, {r4, r5, r12, lr}
361 orr r3, r3, r4, lsr #8
363 orr r4, r4, r5, lsr #8
365 orr r5, r5, r12, lsr #8
366 mov r12, r12, lsl #24
367 orr r12, r12, lr, lsr #8
370 ldmia r1!, {r4, r5, r12, lr}
371 orr r3, r3, r4, lsl #8
373 orr r4, r4, r5, lsl #8
375 orr r5, r5, r12, lsl #8
376 mov r12, r12, lsr #24
377 orr r12, r12, lr, lsl #8
379 stmia r0!, {r3-r5, r12}
381 bge .Lmemcpy_fsrcul3loop16
384 blt .Lmemcpy_fsrcul3l4
386 .Lmemcpy_fsrcul3loop4:
387 #if __BYTE_ORDER == __BIG_ENDIAN
390 orr r12, r12, lr, lsr #8
394 orr r12, r12, lr, lsl #8
398 bge .Lmemcpy_fsrcul3loop4
408 blt .Lmemcpy_bl4 /* less than 4 bytes */
410 bne .Lmemcpy_bdestul /* oh unaligned destination addr */
412 bne .Lmemcpy_bsrcul /* oh unaligned source addr */
415 /* We have aligned source and destination */
417 blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
419 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
422 /* blat 32 bytes at a time */
423 /* XXX for really big copies perhaps we should use more registers */
425 ldmdb r1!, {r3, r4, r12, lr}
426 stmdb r0!, {r3, r4, r12, lr}
427 ldmdb r1!, {r3, r4, r12, lr}
428 stmdb r0!, {r3, r4, r12, lr}
434 /* blat a remaining 16 bytes */
435 copydb "{r3, r4, r12, lr}"
438 /* blat a remaining 12 bytes */
439 copydb "{r3, r12, lr}"
454 /* less than 4 bytes to go */
457 #if defined(__USE_BX__)
460 moveq pc, lr /* done */
462 /* copy the crud byte at a time */
468 ldrbge r3, [r1, #-1]!
469 strbge r3, [r0, #-1]!
471 ldrbgt r3, [r1, #-1]!
472 strbgt r3, [r0, #-1]!
474 ldrgeb r3, [r1, #-1]!
475 strgeb r3, [r0, #-1]!
476 ldrgtb r3, [r1, #-1]!
477 strgtb r3, [r0, #-1]!
479 #if defined(__USE_BX__)
484 /* erg - unaligned destination */
488 /* align destination with byte copies */
493 ldrbge r3, [r1, #-1]!
494 strbge r3, [r0, #-1]!
496 ldrbgt r3, [r1, #-1]!
497 strbgt r3, [r0, #-1]!
499 ldrgeb r3, [r1, #-1]!
500 strgeb r3, [r0, #-1]!
501 ldrgtb r3, [r1, #-1]!
502 strgtb r3, [r0, #-1]!
505 blt .Lmemcpy_bl4 /* less than 4 bytes to go */
507 beq .Lmemcpy_bt8 /* we have an aligned source */
509 /* erg - unaligned source */
510 /* This is where it gets nasty ... */
518 blt .Lmemcpy_bsrcul3loop4
520 stmdb sp!, {r4, r5, lr}
522 .Lmemcpy_bsrcul3loop16:
523 #if __BYTE_ORDER == __BIG_ENDIAN
525 ldmdb r1!, {r3-r5, r12}
526 orr lr, lr, r12, lsl #24
528 orr r12, r12, r5, lsl #24
530 orr r5, r5, r4, lsl #24
532 orr r4, r4, r3, lsl #24
535 ldmdb r1!, {r3-r5, r12}
536 orr lr, lr, r12, lsr #24
538 orr r12, r12, r5, lsr #24
540 orr r5, r5, r4, lsr #24
542 orr r4, r4, r3, lsr #24
544 stmdb r0!, {r4, r5, r12, lr}
546 bge .Lmemcpy_bsrcul3loop16
547 ldmia sp!, {r4, r5, lr}
549 blt .Lmemcpy_bsrcul3l4
551 .Lmemcpy_bsrcul3loop4:
552 #if __BYTE_ORDER == __BIG_ENDIAN
555 orr r12, r12, r3, lsl #24
559 orr r12, r12, r3, lsr #24
563 bge .Lmemcpy_bsrcul3loop4
571 blt .Lmemcpy_bsrcul2loop4
573 stmdb sp!, {r4, r5, lr}
575 .Lmemcpy_bsrcul2loop16:
576 #if __BYTE_ORDER == __BIG_ENDIAN
578 ldmdb r1!, {r3-r5, r12}
579 orr lr, lr, r12, lsl #16
580 mov r12, r12, lsr #16
581 orr r12, r12, r5, lsl #16
583 orr r5, r5, r4, lsl #16
585 orr r4, r4, r3, lsl #16
588 ldmdb r1!, {r3-r5, r12}
589 orr lr, lr, r12, lsr #16
590 mov r12, r12, lsl #16
591 orr r12, r12, r5, lsr #16
593 orr r5, r5, r4, lsr #16
595 orr r4, r4, r3, lsr #16
597 stmdb r0!, {r4, r5, r12, lr}
599 bge .Lmemcpy_bsrcul2loop16
600 ldmia sp!, {r4, r5, lr}
602 blt .Lmemcpy_bsrcul2l4
604 .Lmemcpy_bsrcul2loop4:
605 #if __BYTE_ORDER == __BIG_ENDIAN
608 orr r12, r12, r3, lsl #16
612 orr r12, r12, r3, lsr #16
616 bge .Lmemcpy_bsrcul2loop4
624 blt .Lmemcpy_bsrcul1loop4
626 stmdb sp!, {r4, r5, lr}
628 .Lmemcpy_bsrcul1loop32:
629 #if __BYTE_ORDER == __BIG_ENDIAN
631 ldmdb r1!, {r3-r5, r12}
632 orr lr, lr, r12, lsl #8
633 mov r12, r12, lsr #24
634 orr r12, r12, r5, lsl #8
636 orr r5, r5, r4, lsl #8
638 orr r4, r4, r3, lsl #8
641 ldmdb r1!, {r3-r5, r12}
642 orr lr, lr, r12, lsr #8
643 mov r12, r12, lsl #24
644 orr r12, r12, r5, lsr #8
646 orr r5, r5, r4, lsr #8
648 orr r4, r4, r3, lsr #8
650 stmdb r0!, {r4, r5, r12, lr}
652 bge .Lmemcpy_bsrcul1loop32
653 ldmia sp!, {r4, r5, lr}
655 blt .Lmemcpy_bsrcul1l4
657 .Lmemcpy_bsrcul1loop4:
658 #if __BYTE_ORDER == __BIG_ENDIAN
661 orr r12, r12, r3, lsl #8
665 orr r12, r12, r3, lsr #8
669 bge .Lmemcpy_bsrcul1loop4
675 #else /* THUMB1_ONLY */
677 /* This is a fairly dumb implementation for when we can't use the 32-bit code
682 .type _memcpy,%function
689 @ See if we have overlapping regions, and need to reverse the
690 @ direction of the copy
692 bls .Lmemcpy_forwards
695 bcc .Lmemcpy_backwards
702 bne .Lmemcpy_funaligned
704 bcc .Lmemcpy_funaligned
705 1: @ copy up to the first word boundary.
714 1: @ Copy aligned words