]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/uclibc/lib/contrib/uclibc/libc/string/sh/sh4/memmove.c
update
[l4.git] / l4 / pkg / uclibc / lib / contrib / uclibc / libc / string / sh / sh4 / memmove.c
1 /* memmove implementation for SH4
2  *
3  * Copyright (C) 2009 STMicroelectronics Ltd.
4  *
5  * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
6  *
7  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
8  */
9
10 #ifndef __SH_FPU_ANY__
11 #include "../../generic/memmove.c"
12 #else
13
14 #include <string.h>
15
16 #define FPSCR_SR        (1 << 20)
17 #define STORE_FPSCR(x)  __asm__ __volatile__("sts fpscr, %0" : "=r"(x))
18 #define LOAD_FPSCR(x)   __asm__ __volatile__("lds %0, fpscr" : : "r"(x))
19
20 static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len)
21 {
22         char *d = (char *)dest;
23         char *s = (char *)src;
24
25         if (len >= 64) {
26                 unsigned long fpscr;
27                 int *s1;
28                 int *d1;
29
30                 /* Align the dest to 4 byte boundary. */
31                 while ((unsigned)d & 0x7) {
32                         *d++ = *s++;
33                         len--;
34                 }
35
36                 s1 = (int *)s;
37                 d1 = (int *)d;
38
39                 /* check if s is well aligned to use FPU */
40                 if (!((unsigned)s1 & 0x7)) {
41
42                         /* Align the dest to cache-line boundary */
43                         while ((unsigned)d1 & 0x1c) {
44                                 *d1++ = *s1++;
45                                 len -= 4;
46                         }
47
48                         /* Use paired single precision load or store mode for
49                          * 64-bit tranfering.*/
50                         STORE_FPSCR(fpscr);
51                         LOAD_FPSCR(FPSCR_SR);
52
53                         while (len >= 32) {
54                                 __asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1));
55                                 __asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1));
56                                 __asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1));
57                                 __asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1));
58                                 __asm__
59                                     __volatile__ ("fmov dr0,@%0"::"r"
60                                               (d1):"memory");
61                                 d1 += 2;
62                                 __asm__
63                                     __volatile__ ("fmov dr2,@%0"::"r"
64                                               (d1):"memory");
65                                 d1 += 2;
66                                 __asm__
67                                     __volatile__ ("fmov dr4,@%0"::"r"
68                                               (d1):"memory");
69                                 d1 += 2;
70                                 __asm__
71                                     __volatile__ ("fmov dr6,@%0"::"r"
72                                               (d1):"memory");
73                                 d1 += 2;
74                                 len -= 32;
75                         }
76                         LOAD_FPSCR(fpscr);
77                 }
78                 s = (char *)s1;
79                 d = (char *)d1;
80                 /*TODO: other subcases could be covered here?!?*/
81         }
82         /* Go to per-byte copy */
83         while (len > 0) {
84                 *d++ = *s++;
85                 len--;
86         }
87         return;
88 }
89
90 void *memmove(void *dest, const void *src, size_t len)
91 {
92         unsigned long int d = (long int)dest;
93         unsigned long int s = (long int)src;
94         unsigned long int res;
95
96         if (d >= s)
97                 res = d - s;
98         else
99                 res = s - d;
100         /*
101          * 1) dest and src are not overlap  ==> memcpy (BWD/FDW)
102          * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW)
103          * 3) left-to-right overlap ==>  Copy from the beginning to the end
104          * 4) right-to-left overlap ==>  Copy from the end to the beginning
105          */
106
107         if (res == 0)           /* 100% overlap */
108                 memcpy(dest, src, len); /* No overlap */
109         else if (res >= len)
110                 memcpy(dest, src, len);
111         else {
112                 if (d > s)      /* right-to-left overlap */
113                         memcpy(dest, src, len); /* memcpy is BWD */
114                 else            /* cannot use SH4 memcpy for this case */
115                         fpu_optimised_copy_fwd(dest, src, len);
116         }
117         return (dest);
118 }
119
120 libc_hidden_def(memmove)
121 #endif /*__SH_FPU_ANY__ */