]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/l4re-core/uclibc/lib/contrib/uclibc/libc/string/sparc/sparc64/memcpy.S
Update
[l4.git] / l4 / pkg / l4re-core / uclibc / lib / contrib / uclibc / libc / string / sparc / sparc64 / memcpy.S
1 /* Copy SIZE bytes from SRC to DEST.
2    For UltraSPARC.
3    Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by David S. Miller (davem@caip.rutgers.edu) and
6                   Jakub Jelinek (jakub@redhat.com).
7
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
12
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
17
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, see
20    <http://www.gnu.org/licenses/>.  */
21
22 #include <features.h>
23 #include <asm/asi.h>
24 #ifndef XCC
25 #define USE_BPR
26         .register       %g2, #scratch
27         .register       %g3, #scratch
28         .register       %g6, #scratch
29 #define XCC     xcc
30 #endif
31 #define FPRS_FEF        4
32
33 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)           \
34         faligndata      %f1, %f2, %f48;                         \
35         faligndata      %f2, %f3, %f50;                         \
36         faligndata      %f3, %f4, %f52;                         \
37         faligndata      %f4, %f5, %f54;                         \
38         faligndata      %f5, %f6, %f56;                         \
39         faligndata      %f6, %f7, %f58;                         \
40         faligndata      %f7, %f8, %f60;                         \
41         faligndata      %f8, %f9, %f62;
42
43 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)    \
44         ldda            [%src] %asi, %fdest;                    \
45         add             %src, 0x40, %src;                       \
46         add             %dest, 0x40, %dest;                     \
47         subcc           %len, 0x40, %len;                       \
48         be,pn           %xcc, jmptgt;                           \
49          stda           %fsrc, [%dest - 0x40] %asi;
50
51 #define LOOP_CHUNK1(src, dest, len, branch_dest)                \
52         MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
53 #define LOOP_CHUNK2(src, dest, len, branch_dest)                \
54         MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
55 #define LOOP_CHUNK3(src, dest, len, branch_dest)                \
56         MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
57
58 #define STORE_SYNC(dest, fsrc)                                  \
59         stda            %fsrc, [%dest] %asi;                    \
60         add             %dest, 0x40, %dest;
61
62 #define STORE_JUMP(dest, fsrc, target)                          \
63         stda            %fsrc, [%dest] %asi;                    \
64         add             %dest, 0x40, %dest;                     \
65         ba,pt           %xcc, target;
66
67 #define VISLOOP_PAD nop; nop; nop; nop;                         \
68                     nop; nop; nop; nop;                         \
69                     nop; nop; nop; nop;                         \
70                     nop; nop; nop;
71
72 #define FINISH_VISCHUNK(dest, f0, f1, left)                     \
73         subcc           %left, 8, %left;                        \
74         bl,pn           %xcc, 205f;                             \
75          faligndata     %f0, %f1, %f48;                         \
76         std             %f48, [%dest];                          \
77         add             %dest, 8, %dest;
78
79 #define UNEVEN_VISCHUNK(dest, f0, f1, left)                     \
80         subcc           %left, 8, %left;                        \
81         bl,pn           %xcc, 205f;                             \
82          fsrc1          %f0, %f1;                               \
83         ba,a,pt         %xcc, 204f;
84
85         /* Macros for non-VIS memcpy code. */
86 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)         \
87         ldx             [%src + offset + 0x00], %t0;            \
88         ldx             [%src + offset + 0x08], %t1;            \
89         ldx             [%src + offset + 0x10], %t2;            \
90         ldx             [%src + offset + 0x18], %t3;            \
91         stw             %t0, [%dst + offset + 0x04];            \
92         srlx            %t0, 32, %t0;                           \
93         stw             %t0, [%dst + offset + 0x00];            \
94         stw             %t1, [%dst + offset + 0x0c];            \
95         srlx            %t1, 32, %t1;                           \
96         stw             %t1, [%dst + offset + 0x08];            \
97         stw             %t2, [%dst + offset + 0x14];            \
98         srlx            %t2, 32, %t2;                           \
99         stw             %t2, [%dst + offset + 0x10];            \
100         stw             %t3, [%dst + offset + 0x1c];            \
101         srlx            %t3, 32, %t3;                           \
102         stw             %t3, [%dst + offset + 0x18];
103
104 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)    \
105         ldx             [%src + offset + 0x00], %t0;            \
106         ldx             [%src + offset + 0x08], %t1;            \
107         ldx             [%src + offset + 0x10], %t2;            \
108         ldx             [%src + offset + 0x18], %t3;            \
109         stx             %t0, [%dst + offset + 0x00];            \
110         stx             %t1, [%dst + offset + 0x08];            \
111         stx             %t2, [%dst + offset + 0x10];            \
112         stx             %t3, [%dst + offset + 0x18];            \
113         ldx             [%src + offset + 0x20], %t0;            \
114         ldx             [%src + offset + 0x28], %t1;            \
115         ldx             [%src + offset + 0x30], %t2;            \
116         ldx             [%src + offset + 0x38], %t3;            \
117         stx             %t0, [%dst + offset + 0x20];            \
118         stx             %t1, [%dst + offset + 0x28];            \
119         stx             %t2, [%dst + offset + 0x30];            \
120         stx             %t3, [%dst + offset + 0x38];
121
122 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)        \
123         ldx             [%src - offset - 0x10], %t0;            \
124         ldx             [%src - offset - 0x08], %t1;            \
125         stw             %t0, [%dst - offset - 0x0c];            \
126         srlx            %t0, 32, %t2;                           \
127         stw             %t2, [%dst - offset - 0x10];            \
128         stw             %t1, [%dst - offset - 0x04];            \
129         srlx            %t1, 32, %t3;                           \
130         stw             %t3, [%dst - offset - 0x08];
131
132 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)           \
133         ldx             [%src - offset - 0x10], %t0;            \
134         ldx             [%src - offset - 0x08], %t1;            \
135         stx             %t0, [%dst - offset - 0x10];            \
136         stx             %t1, [%dst - offset - 0x08];
137
138         /* Macros for non-VIS memmove code. */
139 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)        \
140         ldx             [%src - offset - 0x20], %t0;            \
141         ldx             [%src - offset - 0x18], %t1;            \
142         ldx             [%src - offset - 0x10], %t2;            \
143         ldx             [%src - offset - 0x08], %t3;            \
144         stw             %t0, [%dst - offset - 0x1c];            \
145         srlx            %t0, 32, %t0;                           \
146         stw             %t0, [%dst - offset - 0x20];            \
147         stw             %t1, [%dst - offset - 0x14];            \
148         srlx            %t1, 32, %t1;                           \
149         stw             %t1, [%dst - offset - 0x18];            \
150         stw             %t2, [%dst - offset - 0x0c];            \
151         srlx            %t2, 32, %t2;                           \
152         stw             %t2, [%dst - offset - 0x10];            \
153         stw             %t3, [%dst - offset - 0x04];            \
154         srlx            %t3, 32, %t3;                           \
155         stw             %t3, [%dst - offset - 0x08];
156
157 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)   \
158         ldx             [%src - offset - 0x20], %t0;            \
159         ldx             [%src - offset - 0x18], %t1;            \
160         ldx             [%src - offset - 0x10], %t2;            \
161         ldx             [%src - offset - 0x08], %t3;            \
162         stx             %t0, [%dst - offset - 0x20];            \
163         stx             %t1, [%dst - offset - 0x18];            \
164         stx             %t2, [%dst - offset - 0x10];            \
165         stx             %t3, [%dst - offset - 0x08];            \
166         ldx             [%src - offset - 0x40], %t0;            \
167         ldx             [%src - offset - 0x38], %t1;            \
168         ldx             [%src - offset - 0x30], %t2;            \
169         ldx             [%src - offset - 0x28], %t3;            \
170         stx             %t0, [%dst - offset - 0x40];            \
171         stx             %t1, [%dst - offset - 0x38];            \
172         stx             %t2, [%dst - offset - 0x30];            \
173         stx             %t3, [%dst - offset - 0x28];
174
175 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)       \
176         ldx             [%src + offset + 0x00], %t0;            \
177         ldx             [%src + offset + 0x08], %t1;            \
178         stw             %t0, [%dst + offset + 0x04];            \
179         srlx            %t0, 32, %t2;                           \
180         stw             %t2, [%dst + offset + 0x00];            \
181         stw             %t1, [%dst + offset + 0x0c];            \
182         srlx            %t1, 32, %t3;                           \
183         stw             %t3, [%dst + offset + 0x08];
184
185 #define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)          \
186         ldx             [%src + offset + 0x00], %t0;            \
187         ldx             [%src + offset + 0x08], %t1;            \
188         stx             %t0, [%dst + offset + 0x00];            \
189         stx             %t1, [%dst + offset + 0x08];
190
191         .text
192         .align          32
193
194 #ifdef __UCLIBC_SUSV3_LEGACY__
195 ENTRY(bcopy)
196         sub             %o1, %o0, %o4                   /* IEU0         Group           */
197         mov             %o0, %g3                        /* IEU1                         */
198         cmp             %o4, %o2                        /* IEU1         Group           */
199         mov             %o1, %o0                        /* IEU0                         */
200         bgeu,pt         %XCC, 210f                      /* CTI                          */
201          mov            %g3, %o1                        /* IEU0         Group           */
202 #ifndef USE_BPR
203         srl             %o2, 0, %o2                     /* IEU1                         */
204 #endif
205         brnz,pn         %o2, 220f                       /* CTI          Group           */
206          add            %o0, %o2, %o0                   /* IEU0                         */
207         retl
208          nop
209 END(bcopy)
210 #endif
211
212         .align          32
213 200:    be,pt           %xcc, 201f                      /* CTI                          */
214          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
215         mov             8, %g1                          /* IEU0                         */
216         sub             %g1, %g2, %g2                   /* IEU0         Group           */
217         andcc           %o0, 1, %g0                     /* IEU1                         */
218         be,pt           %icc, 2f                        /* CTI                          */
219          sub            %o2, %g2, %o2                   /* IEU0         Group           */
220 1:      ldub            [%o1], %o5                      /* Load         Group           */
221         add             %o1, 1, %o1                     /* IEU0                         */
222         add             %o0, 1, %o0                     /* IEU1                         */
223         subcc           %g2, 1, %g2                     /* IEU1         Group           */
224         be,pn           %xcc, 3f                        /* CTI                          */
225          stb            %o5, [%o0 - 1]                  /* Store                        */
226 2:      ldub            [%o1], %o5                      /* Load         Group           */
227         add             %o0, 2, %o0                     /* IEU0                         */
228         ldub            [%o1 + 1], %g3                  /* Load         Group           */
229         subcc           %g2, 2, %g2                     /* IEU1         Group           */
230         stb             %o5, [%o0 - 2]                  /* Store                        */
231         add             %o1, 2, %o1                     /* IEU0                         */
232         bne,pt          %xcc, 2b                        /* CTI          Group           */
233          stb            %g3, [%o0 - 1]                  /* Store                        */
234 3:      andcc           %o0, 0x38, %g5                  /* IEU1         Group           */
235 201:    be,pt           %icc, 202f                      /* CTI                          */
236          mov            64, %g1                         /* IEU0                         */
237         fmovd           %f0, %f2                        /* FPU                          */
238         sub             %g1, %g5, %g5                   /* IEU0         Group           */
239         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
240         ldd             [%g1], %f4                      /* Load         Group           */
241         sub             %o2, %g5, %o2                   /* IEU0                         */
242 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
243         add             %g1, 0x8, %g1                   /* IEU0         Group           */
244         subcc           %g5, 8, %g5                     /* IEU1                         */
245         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
246         std             %f0, [%o0]                      /* Store                        */
247         add             %o1, 8, %o1                     /* IEU0         Group           */
248         be,pn           %xcc, 202f                      /* CTI                          */
249          add            %o0, 8, %o0                     /* IEU1                         */
250         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
251         add             %g1, 8, %g1                     /* IEU0                         */
252         subcc           %g5, 8, %g5                     /* IEU1                         */
253         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
254         std             %f0, [%o0]                      /* Store                        */
255         add             %o1, 8, %o1                     /* IEU0                         */
256         bne,pt          %xcc, 1b                        /* CTI          Group           */
257          add            %o0, 8, %o0                     /* IEU0                         */
258 202:    membar    #LoadStore | #StoreStore | #StoreLoad /* LSU          Group           */
259         wr              %g0, ASI_BLK_P, %asi            /* LSU          Group           */
260         subcc           %o2, 0x40, %g6                  /* IEU1         Group           */
261         mov             %o1, %g1                        /* IEU0                         */
262         andncc          %g6, (0x40 - 1), %g6            /* IEU1         Group           */
263         srl             %g1, 3, %g2                     /* IEU0                         */
264         sub             %o2, %g6, %g3                   /* IEU0         Group           */
265         andn            %o1, (0x40 - 1), %o1            /* IEU1                         */
266         and             %g2, 7, %g2                     /* IEU0         Group           */
267         andncc          %g3, 0x7, %g3                   /* IEU1                         */
268         fmovd           %f0, %f2                        /* FPU                          */
269         sub             %g3, 0x10, %g3                  /* IEU0         Group           */
270         sub             %o2, %g6, %o2                   /* IEU1                         */
271         alignaddr       %g1, %g0, %g0                   /* GRU          Group           */
272         add             %g1, %g6, %g1                   /* IEU0         Group           */
273         subcc           %o2, %g3, %o2                   /* IEU1                         */
274         ldda            [%o1 + 0x00] %asi, %f0          /* LSU          Group           */
275         add             %g1, %g3, %g1                   /* IEU0                         */
276         ldda            [%o1 + 0x40] %asi, %f16         /* LSU          Group           */
277         sub             %g6, 0x80, %g6                  /* IEU0                         */
278         ldda            [%o1 + 0x80] %asi, %f32         /* LSU          Group           */
279                                                         /* Clk1         Group 8-(       */
280                                                         /* Clk2         Group 8-(       */
281                                                         /* Clk3         Group 8-(       */
282                                                         /* Clk4         Group 8-(       */
283 203:    rd              %pc, %g5                        /* PDU          Group 8-(       */
284         addcc           %g5, %lo(300f - 203b), %g5      /* IEU1         Group           */
285         sll             %g2, 9, %g2                     /* IEU0                         */
286         jmpl            %g5 + %g2, %g0                  /* CTI          Group brk forced*/
287          addcc          %o1, 0xc0, %o1                  /* IEU1         Group           */
288
289         .align          512             /* OK, here comes the fun part... */
290 300:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  LOOP_CHUNK1(o1, o0, g6, 301f)
291         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  LOOP_CHUNK2(o1, o0, g6, 302f)
292         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   LOOP_CHUNK3(o1, o0, g6, 303f)
293         b,pt            %xcc, 300b+4; faligndata %f0, %f2, %f48
294 301:    FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_SYNC(o0, f48) membar #Sync
295         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_JUMP(o0, f48, 400f) membar #Sync
296 302:    FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_SYNC(o0, f48) membar #Sync
297         FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_JUMP(o0, f48, 416f) membar #Sync
298 303:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_SYNC(o0, f48) membar #Sync
299         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_JUMP(o0, f48, 432f) membar #Sync
300         VISLOOP_PAD
301 310:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  LOOP_CHUNK1(o1, o0, g6, 311f)
302         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  LOOP_CHUNK2(o1, o0, g6, 312f)
303         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   LOOP_CHUNK3(o1, o0, g6, 313f)
304         b,pt            %xcc, 310b+4; faligndata %f2, %f4, %f48
305 311:    FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_SYNC(o0, f48) membar #Sync
306         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_JUMP(o0, f48, 402f) membar #Sync
307 312:    FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_SYNC(o0, f48) membar #Sync
308         FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_JUMP(o0, f48, 418f) membar #Sync
309 313:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_SYNC(o0, f48) membar #Sync
310         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_JUMP(o0, f48, 434f) membar #Sync
311         VISLOOP_PAD
312 320:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  LOOP_CHUNK1(o1, o0, g6, 321f)
313         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  LOOP_CHUNK2(o1, o0, g6, 322f)
314         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   LOOP_CHUNK3(o1, o0, g6, 323f)
315         b,pt            %xcc, 320b+4; faligndata %f4, %f6, %f48
316 321:    FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_SYNC(o0, f48) membar #Sync
317         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_JUMP(o0, f48, 404f) membar #Sync
318 322:    FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_SYNC(o0, f48) membar #Sync
319         FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_JUMP(o0, f48, 420f) membar #Sync
320 323:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_SYNC(o0, f48) membar #Sync
321         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_JUMP(o0, f48, 436f) membar #Sync
322         VISLOOP_PAD
323 330:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  LOOP_CHUNK1(o1, o0, g6, 331f)
324         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  LOOP_CHUNK2(o1, o0, g6, 332f)
325         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   LOOP_CHUNK3(o1, o0, g6, 333f)
326         b,pt            %xcc, 330b+4; faligndata %f6, %f8, %f48
327 331:    FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_SYNC(o0, f48) membar #Sync
328         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_JUMP(o0, f48, 406f) membar #Sync
329 332:    FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_SYNC(o0, f48) membar #Sync
330         FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_JUMP(o0, f48, 422f) membar #Sync
331 333:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_SYNC(o0, f48) membar #Sync
332         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_JUMP(o0, f48, 438f) membar #Sync
333         VISLOOP_PAD
334 340:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  LOOP_CHUNK1(o1, o0, g6, 341f)
335         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  LOOP_CHUNK2(o1, o0, g6, 342f)
336         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   LOOP_CHUNK3(o1, o0, g6, 343f)
337         b,pt            %xcc, 340b+4; faligndata %f8, %f10, %f48
338 341:    FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_SYNC(o0, f48) membar #Sync
339         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_JUMP(o0, f48, 408f) membar #Sync
340 342:    FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_SYNC(o0, f48) membar #Sync
341         FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_JUMP(o0, f48, 424f) membar #Sync
342 343:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_SYNC(o0, f48) membar #Sync
343         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_JUMP(o0, f48, 440f) membar #Sync
344         VISLOOP_PAD
345 350:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  LOOP_CHUNK1(o1, o0, g6, 351f)
346         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  LOOP_CHUNK2(o1, o0, g6, 352f)
347         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  LOOP_CHUNK3(o1, o0, g6, 353f)
348         b,pt            %xcc, 350b+4; faligndata %f10, %f12, %f48
349 351:    FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_SYNC(o0, f48) membar #Sync
350         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_JUMP(o0, f48, 410f) membar #Sync
351 352:    FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_SYNC(o0, f48) membar #Sync
352         FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_JUMP(o0, f48, 426f) membar #Sync
353 353:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_SYNC(o0, f48) membar #Sync
354         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_JUMP(o0, f48, 442f) membar #Sync
355         VISLOOP_PAD
356 360:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  LOOP_CHUNK1(o1, o0, g6, 361f)
357         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  LOOP_CHUNK2(o1, o0, g6, 362f)
358         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  LOOP_CHUNK3(o1, o0, g6, 363f)
359         b,pt            %xcc, 360b+4; faligndata %f12, %f14, %f48
360 361:    FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_SYNC(o0, f48) membar #Sync
361         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_JUMP(o0, f48, 412f) membar #Sync
362 362:    FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_SYNC(o0, f48) membar #Sync
363         FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_JUMP(o0, f48, 428f) membar #Sync
364 363:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_SYNC(o0, f48) membar #Sync
365         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_JUMP(o0, f48, 444f) membar #Sync
366         VISLOOP_PAD
367 370:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  LOOP_CHUNK1(o1, o0, g6, 371f)
368         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  LOOP_CHUNK2(o1, o0, g6, 372f)
369         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  LOOP_CHUNK3(o1, o0, g6, 373f)
370         b,pt            %xcc, 370b+4; faligndata %f14, %f16, %f48
371 371:    FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_SYNC(o0, f48) membar #Sync
372         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_JUMP(o0, f48, 414f) membar #Sync
373 372:    FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_SYNC(o0, f48) membar #Sync
374         FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_JUMP(o0, f48, 430f) membar #Sync
375 373:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_SYNC(o0, f48) membar #Sync
376         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_JUMP(o0, f48, 446f) membar #Sync
377         VISLOOP_PAD
378 400:    FINISH_VISCHUNK(o0, f0,  f2,  g3)
379 402:    FINISH_VISCHUNK(o0, f2,  f4,  g3)
380 404:    FINISH_VISCHUNK(o0, f4,  f6,  g3)
381 406:    FINISH_VISCHUNK(o0, f6,  f8,  g3)
382 408:    FINISH_VISCHUNK(o0, f8,  f10, g3)
383 410:    FINISH_VISCHUNK(o0, f10, f12, g3)
384 412:    FINISH_VISCHUNK(o0, f12, f14, g3)
385 414:    UNEVEN_VISCHUNK(o0, f14, f0,  g3)
386 416:    FINISH_VISCHUNK(o0, f16, f18, g3)
387 418:    FINISH_VISCHUNK(o0, f18, f20, g3)
388 420:    FINISH_VISCHUNK(o0, f20, f22, g3)
389 422:    FINISH_VISCHUNK(o0, f22, f24, g3)
390 424:    FINISH_VISCHUNK(o0, f24, f26, g3)
391 426:    FINISH_VISCHUNK(o0, f26, f28, g3)
392 428:    FINISH_VISCHUNK(o0, f28, f30, g3)
393 430:    UNEVEN_VISCHUNK(o0, f30, f0,  g3)
394 432:    FINISH_VISCHUNK(o0, f32, f34, g3)
395 434:    FINISH_VISCHUNK(o0, f34, f36, g3)
396 436:    FINISH_VISCHUNK(o0, f36, f38, g3)
397 438:    FINISH_VISCHUNK(o0, f38, f40, g3)
398 440:    FINISH_VISCHUNK(o0, f40, f42, g3)
399 442:    FINISH_VISCHUNK(o0, f42, f44, g3)
400 444:    FINISH_VISCHUNK(o0, f44, f46, g3)
401 446:    UNEVEN_VISCHUNK(o0, f46, f0,  g3)
402 204:    ldd             [%o1], %f2                      /* Load         Group           */
403         add             %o1, 8, %o1                     /* IEU0                         */
404         subcc           %g3, 8, %g3                     /* IEU1                         */
405         faligndata      %f0, %f2, %f8                   /* GRU          Group           */
406         std             %f8, [%o0]                      /* Store                        */
407         bl,pn           %xcc, 205f                      /* CTI                          */
408          add            %o0, 8, %o0                     /* IEU0         Group           */
409         ldd             [%o1], %f0                      /* Load         Group           */
410         add             %o1, 8, %o1                     /* IEU0                         */
411         subcc           %g3, 8, %g3                     /* IEU1                         */
412         faligndata      %f2, %f0, %f8                   /* GRU          Group           */
413         std             %f8, [%o0]                      /* Store                        */
414         bge,pt          %xcc, 204b                      /* CTI                          */
415          add            %o0, 8, %o0                     /* IEU0         Group           */
416 205:    brz,pt          %o2, 207f                       /* CTI          Group           */
417          mov            %g1, %o1                        /* IEU0                         */
418 206:    ldub            [%o1], %g5                      /* LOAD                         */
419         add             %o1, 1, %o1                     /* IEU0                         */
420         add             %o0, 1, %o0                     /* IEU1                         */
421         subcc           %o2, 1, %o2                     /* IEU1                         */
422         bne,pt          %xcc, 206b                      /* CTI                          */
423          stb            %g5, [%o0 - 1]                  /* Store        Group           */
424 207:    membar          #StoreLoad | #StoreStore        /* LSU          Group           */
425         wr              %g0, FPRS_FEF, %fprs
426         retl
427          mov            %g4, %o0
428
429 208:    andcc           %o2, 1, %g0                     /* IEU1         Group           */
430         be,pt           %icc, 2f+4                      /* CTI                          */
431 1:       ldub           [%o1], %g5                      /* LOAD         Group           */
432         add             %o1, 1, %o1                     /* IEU0                         */
433         add             %o0, 1, %o0                     /* IEU1                         */
434         subcc           %o2, 1, %o2                     /* IEU1         Group           */
435         be,pn           %xcc, 209f                      /* CTI                          */
436          stb            %g5, [%o0 - 1]                  /* Store                        */
437 2:      ldub            [%o1], %g5                      /* LOAD         Group           */
438         add             %o0, 2, %o0                     /* IEU0                         */
439         ldub            [%o1 + 1], %o5                  /* LOAD         Group           */
440         add             %o1, 2, %o1                     /* IEU0                         */
441         subcc           %o2, 2, %o2                     /* IEU1         Group           */
442         stb             %g5, [%o0 - 2]                  /* Store                        */
443         bne,pt          %xcc, 2b                        /* CTI                          */
444          stb            %o5, [%o0 - 1]                  /* Store                        */
445 209:    retl
446          mov            %g4, %o0
447
448 #ifdef USE_BPR
449
450         /* void *__align_cpy_4(void *dest, void *src, size_t n)
451          * SPARC v9 SYSV ABI
452          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3))
453          */
454
455         .align          32
456 ENTRY(__align_cpy_4)
457         mov             %o0, %g4                        /* IEU0         Group           */
458         cmp             %o2, 15                         /* IEU1                         */
459         bleu,pn         %xcc, 208b                      /* CTI                          */
460          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
461         bgeu,pn         %xcc, 200b                      /* CTI                          */
462          andcc          %o0, 7, %g2                     /* IEU1         Group           */
463         ba,pt           %xcc, 216f                      /* CTI                          */
464          andcc          %o1, 4, %g0                     /* IEU1         Group           */
465 END(__align_cpy_4)
466
467         /* void *__align_cpy_8(void *dest, void *src, size_t n)
468          * SPARC v9 SYSV ABI
469          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7))
470          */
471
472         .align          32
473 ENTRY(__align_cpy_8)
474         mov             %o0, %g4                        /* IEU0         Group           */
475         cmp             %o2, 15                         /* IEU1                         */
476         bleu,pn         %xcc, 208b                      /* CTI                          */
477          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
478         bgeu,pn         %xcc, 201b                      /* CTI                          */
479          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
480         andcc           %o2, -128, %g6                  /* IEU1         Group           */
481         bne,a,pt        %xcc, 82f + 4                   /* CTI                          */
482          ldx            [%o1], %g1                      /* Load                         */
483         ba,pt           %xcc, 41f                       /* CTI          Group           */
484          andcc          %o2, 0x70, %g6                  /* IEU1                         */
485 END(__align_cpy_8)
486
487         /* void *__align_cpy_16(void *dest, void *src, size_t n)
488          * SPARC v9 SYSV ABI
489          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15))
490          */
491
492         .align          32
493 ENTRY(__align_cpy_16)
494         mov             %o0, %g4                        /* IEU0         Group           */
495         cmp             %o2, (64 * 6)                   /* IEU1                         */
496         bgeu,pn         %xcc, 201b                      /* CTI                          */
497          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
498         andcc           %o2, -128, %g6                  /* IEU1         Group           */
499         bne,a,pt        %xcc, 82f + 4                   /* CTI                          */
500          ldx            [%o1], %g1                      /* Load                         */
501         ba,pt           %xcc, 41f                       /* CTI          Group           */
502          andcc          %o2, 0x70, %g6                  /* IEU1                         */
503 END(__align_cpy_16)
504
505 #endif
506
507         .align          32
508 ENTRY(memcpy)
509 210:
510 #ifndef USE_BPR
511         srl             %o2, 0, %o2                     /* IEU1         Group           */
512 #endif  
513         brz,pn          %o2, 209b                       /* CTI          Group           */
514          mov            %o0, %g4                        /* IEU0                         */
515 218:    cmp             %o2, 15                         /* IEU1         Group           */
516         bleu,pn         %xcc, 208b                      /* CTI                          */
517          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
518         bgeu,pn         %xcc, 200b                      /* CTI                          */
519          andcc          %o0, 7, %g2                     /* IEU1         Group           */
520         sub             %o0, %o1, %g5                   /* IEU0                         */
521         andcc           %g5, 3, %o5                     /* IEU1         Group           */
522         bne,pn          %xcc, 212f                      /* CTI                          */
523          andcc          %o1, 3, %g0                     /* IEU1         Group           */
524         be,a,pt         %xcc, 216f                      /* CTI                          */
525          andcc          %o1, 4, %g0                     /* IEU1         Group           */
526         andcc           %o1, 1, %g0                     /* IEU1         Group           */
527         be,pn           %xcc, 4f                        /* CTI                          */
528          andcc          %o1, 2, %g0                     /* IEU1         Group           */
529         ldub            [%o1], %g2                      /* Load         Group           */
530         add             %o1, 1, %o1                     /* IEU0                         */
531         add             %o0, 1, %o0                     /* IEU1                         */
532         sub             %o2, 1, %o2                     /* IEU0         Group           */
533         bne,pn          %xcc, 5f                        /* CTI          Group           */
534          stb            %g2, [%o0 - 1]                  /* Store                        */
535 4:      lduh            [%o1], %g2                      /* Load         Group           */
536         add             %o1, 2, %o1                     /* IEU0                         */
537         add             %o0, 2, %o0                     /* IEU1                         */
538         sub             %o2, 2, %o2                     /* IEU0                         */
539         sth             %g2, [%o0 - 2]                  /* Store        Group + bubble  */
540 5:      andcc           %o1, 4, %g0                     /* IEU1                         */
541 216:    be,a,pn         %xcc, 2f                        /* CTI                          */
542          andcc          %o2, -128, %g6                  /* IEU1         Group           */
543         lduw            [%o1], %g5                      /* Load         Group           */
544         add             %o1, 4, %o1                     /* IEU0                         */
545         add             %o0, 4, %o0                     /* IEU1                         */
546         sub             %o2, 4, %o2                     /* IEU0         Group           */
547         stw             %g5, [%o0 - 4]                  /* Store                        */
548         andcc           %o2, -128, %g6                  /* IEU1         Group           */
549 2:      be,pn           %xcc, 215f                      /* CTI                          */
550          andcc          %o0, 4, %g0                     /* IEU1         Group           */
551         be,pn           %xcc, 82f + 4                   /* CTI          Group           */
552 5:      MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
553         MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
554         MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
555         MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
556 35:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
557         add             %o1, 128, %o1                   /* IEU0                         */
558         bne,pt          %xcc, 5b                        /* CTI                          */
559          add            %o0, 128, %o0                   /* IEU0         Group           */
560 215:    andcc           %o2, 0x70, %g6                  /* IEU1         Group           */
561 41:     be,pn           %xcc, 80f                       /* CTI                          */
562          andcc          %o2, 8, %g0                     /* IEU1         Group           */
563                                                         /* Clk1 8-(                     */
564                                                         /* Clk2 8-(                     */
565                                                         /* Clk3 8-(                     */
566                                                         /* Clk4 8-(                     */
567 79:     rd              %pc, %o5                        /* PDU          Group           */
568         sll             %g6, 1, %g5                     /* IEU0         Group           */
569         add             %o1, %g6, %o1                   /* IEU1                         */
570         sub             %o5, %g5, %o5                   /* IEU0         Group           */
571         jmpl            %o5 + %lo(80f - 79b), %g0       /* CTI          Group brk forced*/
572          add            %o0, %g6, %o0                   /* IEU0         Group           */
573 36:     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
574         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
575         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
576         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
577         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
578         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
579         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
580 80:     be,pt           %xcc, 81f                       /* CTI                          */
581          andcc          %o2, 4, %g0                     /* IEU1                         */
582         ldx             [%o1], %g2                      /* Load         Group           */
583         add             %o0, 8, %o0                     /* IEU0                         */
584         stw             %g2, [%o0 - 0x4]                /* Store        Group           */
585         add             %o1, 8, %o1                     /* IEU1                         */
586         srlx            %g2, 32, %g2                    /* IEU0         Group           */
587         stw             %g2, [%o0 - 0x8]                /* Store                        */
588 81:     be,pt           %xcc, 1f                        /* CTI                          */
589          andcc          %o2, 2, %g0                     /* IEU1         Group           */
590         lduw            [%o1], %g2                      /* Load         Group           */
591         add             %o1, 4, %o1                     /* IEU0                         */
592         stw             %g2, [%o0]                      /* Store        Group           */
593         add             %o0, 4, %o0                     /* IEU0                         */
594 1:      be,pt           %xcc, 1f                        /* CTI                          */
595          andcc          %o2, 1, %g0                     /* IEU1         Group           */
596         lduh            [%o1], %g2                      /* Load         Group           */
597         add             %o1, 2, %o1                     /* IEU0                         */
598         sth             %g2, [%o0]                      /* Store        Group           */
599         add             %o0, 2, %o0                     /* IEU0                         */
600 1:      be,pt           %xcc, 211f                      /* CTI                          */
601          nop                                            /* IEU1                         */
602         ldub            [%o1], %g2                      /* Load         Group           */
603         stb             %g2, [%o0]                      /* Store        Group + bubble  */
604 211:    retl
605          mov            %g4, %o0
606
607 82:     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
608         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
609 37:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
610         add             %o1, 128, %o1                   /* IEU0                         */
611         bne,pt          %xcc, 82b                       /* CTI                          */
612          add            %o0, 128, %o0                   /* IEU0         Group           */
613         andcc           %o2, 0x70, %g6                  /* IEU1                         */
614         be,pn           %xcc, 84f                       /* CTI                          */
615          andcc          %o2, 8, %g0                     /* IEU1         Group           */
616                                                         /* Clk1 8-(                     */
617                                                         /* Clk2 8-(                     */
618                                                         /* Clk3 8-(                     */
619                                                         /* Clk4 8-(                     */
620 83:     rd              %pc, %o5                        /* PDU          Group           */
621         add             %o1, %g6, %o1                   /* IEU0         Group           */
622         sub             %o5, %g6, %o5                   /* IEU1                         */
623         jmpl            %o5 + %lo(84f - 83b), %g0       /* CTI          Group brk forced*/
624          add            %o0, %g6, %o0                   /* IEU0         Group           */
625 38:     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
626         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
627         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
628         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
629         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
630         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
631         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
632 84:     be,pt           %xcc, 85f                       /* CTI          Group           */
633          andcc          %o2, 4, %g0                     /* IEU1                         */
634         ldx             [%o1], %g2                      /* Load         Group           */
635         add             %o0, 8, %o0                     /* IEU0                         */
636         add             %o1, 8, %o1                     /* IEU0         Group           */
637         stx             %g2, [%o0 - 0x8]                /* Store                        */
638 85:     be,pt           %xcc, 1f                        /* CTI                          */
639          andcc          %o2, 2, %g0                     /* IEU1         Group           */
640         lduw            [%o1], %g2                      /* Load         Group           */
641         add             %o0, 4, %o0                     /* IEU0                         */
642         add             %o1, 4, %o1                     /* IEU0         Group           */
643         stw             %g2, [%o0 - 0x4]                /* Store                        */
644 1:      be,pt           %xcc, 1f                        /* CTI                          */
645          andcc          %o2, 1, %g0                     /* IEU1         Group           */
646         lduh            [%o1], %g2                      /* Load         Group           */
647         add             %o0, 2, %o0                     /* IEU0                         */
648         add             %o1, 2, %o1                     /* IEU0         Group           */
649         sth             %g2, [%o0 - 0x2]                /* Store                        */
650 1:      be,pt           %xcc, 1f                        /* CTI                          */
651          nop                                            /* IEU0         Group           */
652         ldub            [%o1], %g2                      /* Load         Group           */
653         stb             %g2, [%o0]                      /* Store        Group + bubble  */
654 1:      retl
655          mov            %g4, %o0
656
657 212:    brz,pt          %g2, 2f                         /* CTI          Group           */
658          mov            8, %g1                          /* IEU0                         */
659         sub             %g1, %g2, %g2                   /* IEU0         Group           */
660         sub             %o2, %g2, %o2                   /* IEU0         Group           */
661 1:      ldub            [%o1], %g5                      /* Load         Group           */
662         add             %o1, 1, %o1                     /* IEU0                         */
663         add             %o0, 1, %o0                     /* IEU1                         */
664         subcc           %g2, 1, %g2                     /* IEU1         Group           */
665         bne,pt          %xcc, 1b                        /* CTI                          */
666          stb            %g5, [%o0 - 1]                  /* Store                        */
667 2:      andn            %o2, 7, %g5                     /* IEU0         Group           */
668         and             %o2, 7, %o2                     /* IEU1                         */
669         fmovd           %f0, %f2                        /* FPU                          */
670         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
671         ldd             [%g1], %f4                      /* Load         Group           */
672 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
673         add             %g1, 0x8, %g1                   /* IEU0         Group           */
674         subcc           %g5, 8, %g5                     /* IEU1                         */
675         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
676         std             %f0, [%o0]                      /* Store                        */
677         add             %o1, 8, %o1                     /* IEU0         Group           */
678         be,pn           %xcc, 213f                      /* CTI                          */
679          add            %o0, 8, %o0                     /* IEU1                         */
680         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
681         add             %g1, 8, %g1                     /* IEU0                         */
682         subcc           %g5, 8, %g5                     /* IEU1                         */
683         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
684         std             %f0, [%o0]                      /* Store                        */
685         add             %o1, 8, %o1                     /* IEU0                         */
686         bne,pn          %xcc, 1b                        /* CTI          Group           */
687          add            %o0, 8, %o0                     /* IEU0                         */
688 213:    brz,pn          %o2, 214f                       /* CTI          Group           */
689          nop                                            /* IEU0                         */
690         ldub            [%o1], %g5                      /* LOAD                         */
691         add             %o1, 1, %o1                     /* IEU0                         */
692         add             %o0, 1, %o0                     /* IEU1                         */
693         subcc           %o2, 1, %o2                     /* IEU1                         */
694         bne,pt          %xcc, 206b                      /* CTI                          */
695          stb            %g5, [%o0 - 1]                  /* Store        Group           */
696 214:    wr              %g0, FPRS_FEF, %fprs
697         retl
698          mov            %g4, %o0
699 END(memcpy)
700 libc_hidden_def(memcpy)
701
702         .align          32
703 228:    andcc           %o2, 1, %g0                     /* IEU1         Group           */
704         be,pt           %icc, 2f+4                      /* CTI                          */
705 1:       ldub           [%o1 - 1], %o5                  /* LOAD         Group           */
706         sub             %o1, 1, %o1                     /* IEU0                         */
707         sub             %o0, 1, %o0                     /* IEU1                         */
708         subcc           %o2, 1, %o2                     /* IEU1         Group           */
709         be,pn           %xcc, 229f                      /* CTI                          */
710          stb            %o5, [%o0]                      /* Store                        */
711 2:      ldub            [%o1 - 1], %o5                  /* LOAD         Group           */
712         sub             %o0, 2, %o0                     /* IEU0                         */
713         ldub            [%o1 - 2], %g5                  /* LOAD         Group           */
714         sub             %o1, 2, %o1                     /* IEU0                         */
715         subcc           %o2, 2, %o2                     /* IEU1         Group           */
716         stb             %o5, [%o0 + 1]                  /* Store                        */
717         bne,pt          %xcc, 2b                        /* CTI                          */
718          stb            %g5, [%o0]                      /* Store                        */
719 229:    retl
720          mov            %g4, %o0
721 219:    retl
722          nop
723
724         .align          32
725 ENTRY(memmove)
726 #ifndef USE_BPR
727         srl             %o2, 0, %o2                     /* IEU1         Group           */
728 #endif
729         brz,pn          %o2, 219b                       /* CTI          Group           */
730          sub            %o0, %o1, %o4                   /* IEU0                         */
731         cmp             %o4, %o2                        /* IEU1         Group           */
732         bgeu,pt         %XCC, 218b                      /* CTI                          */
733          mov            %o0, %g4                        /* IEU0                         */
734         add             %o0, %o2, %o0                   /* IEU0         Group           */
735 220:    add             %o1, %o2, %o1                   /* IEU1                         */
736         cmp             %o2, 15                         /* IEU1         Group           */
737         bleu,pn         %xcc, 228b                      /* CTI                          */
738          andcc          %o0, 7, %g2                     /* IEU1         Group           */
739         sub             %o0, %o1, %g5                   /* IEU0                         */
740         andcc           %g5, 3, %o5                     /* IEU1         Group           */
741         bne,pn          %xcc, 232f                      /* CTI                          */
742          andcc          %o1, 3, %g0                     /* IEU1         Group           */
743         be,a,pt         %xcc, 236f                      /* CTI                          */
744          andcc          %o1, 4, %g0                     /* IEU1         Group           */
745         andcc           %o1, 1, %g0                     /* IEU1         Group           */
746         be,pn           %xcc, 4f                        /* CTI                          */
747          andcc          %o1, 2, %g0                     /* IEU1         Group           */
748         ldub            [%o1 - 1], %g2                  /* Load         Group           */
749         sub             %o1, 1, %o1                     /* IEU0                         */
750         sub             %o0, 1, %o0                     /* IEU1                         */
751         sub             %o2, 1, %o2                     /* IEU0         Group           */
752         be,pn           %xcc, 5f                        /* CTI          Group           */
753          stb            %g2, [%o0]                      /* Store                        */
754 4:      lduh            [%o1 - 2], %g2                  /* Load         Group           */
755         sub             %o1, 2, %o1                     /* IEU0                         */
756         sub             %o0, 2, %o0                     /* IEU1                         */
757         sub             %o2, 2, %o2                     /* IEU0                         */
758         sth             %g2, [%o0]                      /* Store        Group + bubble  */
759 5:      andcc           %o1, 4, %g0                     /* IEU1                         */
760 236:    be,a,pn         %xcc, 2f                        /* CTI                          */
761          andcc          %o2, -128, %g6                  /* IEU1         Group           */
762         lduw            [%o1 - 4], %g5                  /* Load         Group           */
763         sub             %o1, 4, %o1                     /* IEU0                         */
764         sub             %o0, 4, %o0                     /* IEU1                         */
765         sub             %o2, 4, %o2                     /* IEU0         Group           */
766         stw             %g5, [%o0]                      /* Store                        */
767         andcc           %o2, -128, %g6                  /* IEU1         Group           */
768 2:      be,pn           %xcc, 235f                      /* CTI                          */
769          andcc          %o0, 4, %g0                     /* IEU1         Group           */
770         be,pn           %xcc, 282f + 4                  /* CTI          Group           */
771 5:      RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
772         RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
773         RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
774         RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
775         subcc           %g6, 128, %g6                   /* IEU1         Group           */
776         sub             %o1, 128, %o1                   /* IEU0                         */
777         bne,pt          %xcc, 5b                        /* CTI                          */
778          sub            %o0, 128, %o0                   /* IEU0         Group           */
779 235:    andcc           %o2, 0x70, %g6                  /* IEU1         Group           */
780 41:     be,pn           %xcc, 280f                      /* CTI                          */
781          andcc          %o2, 8, %g0                     /* IEU1         Group           */
782                                                         /* Clk1 8-(                     */
783                                                         /* Clk2 8-(                     */
784                                                         /* Clk3 8-(                     */
785                                                         /* Clk4 8-(                     */
786 279:    rd              %pc, %o5                        /* PDU          Group           */
787         sll             %g6, 1, %g5                     /* IEU0         Group           */
788         sub             %o1, %g6, %o1                   /* IEU1                         */
789         sub             %o5, %g5, %o5                   /* IEU0         Group           */
790         jmpl            %o5 + %lo(280f - 279b), %g0     /* CTI          Group brk forced*/
791          sub            %o0, %g6, %o0                   /* IEU0         Group           */
792         RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
793         RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
794         RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
795         RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
796         RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
797         RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
798         RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
799 280:    be,pt           %xcc, 281f                      /* CTI                          */
800          andcc          %o2, 4, %g0                     /* IEU1                         */
801         ldx             [%o1 - 8], %g2                  /* Load         Group           */
802         sub             %o0, 8, %o0                     /* IEU0                         */
803         stw             %g2, [%o0 + 4]                  /* Store        Group           */
804         sub             %o1, 8, %o1                     /* IEU1                         */
805         srlx            %g2, 32, %g2                    /* IEU0         Group           */
806         stw             %g2, [%o0]                      /* Store                        */
807 281:    be,pt           %xcc, 1f                        /* CTI                          */
808          andcc          %o2, 2, %g0                     /* IEU1         Group           */
809         lduw            [%o1 - 4], %g2                  /* Load         Group           */
810         sub             %o1, 4, %o1                     /* IEU0                         */
811         stw             %g2, [%o0 - 4]                  /* Store        Group           */
812         sub             %o0, 4, %o0                     /* IEU0                         */
813 1:      be,pt           %xcc, 1f                        /* CTI                          */
814          andcc          %o2, 1, %g0                     /* IEU1         Group           */
815         lduh            [%o1 - 2], %g2                  /* Load         Group           */
816         sub             %o1, 2, %o1                     /* IEU0                         */
817         sth             %g2, [%o0 - 2]                  /* Store        Group           */
818         sub             %o0, 2, %o0                     /* IEU0                         */
819 1:      be,pt           %xcc, 211f                      /* CTI                          */
820          nop                                            /* IEU1                         */
821         ldub            [%o1 - 1], %g2                  /* Load         Group           */
822         stb             %g2, [%o0 - 1]                  /* Store        Group + bubble  */
823 211:    retl
824          mov            %g4, %o0
825
826 282:    RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
827         RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
828         subcc           %g6, 128, %g6                   /* IEU1         Group           */
829         sub             %o1, 128, %o1                   /* IEU0                         */
830         bne,pt          %xcc, 282b                      /* CTI                          */
831          sub            %o0, 128, %o0                   /* IEU0         Group           */
832         andcc           %o2, 0x70, %g6                  /* IEU1                         */
833         be,pn           %xcc, 284f                      /* CTI                          */
834          andcc          %o2, 8, %g0                     /* IEU1         Group           */
835                                                         /* Clk1 8-(                     */
836                                                         /* Clk2 8-(                     */
837                                                         /* Clk3 8-(                     */
838                                                         /* Clk4 8-(                     */
839 283:    rd              %pc, %o5                        /* PDU          Group           */
840         sub             %o1, %g6, %o1                   /* IEU0         Group           */
841         sub             %o5, %g6, %o5                   /* IEU1                         */
842         jmpl            %o5 + %lo(284f - 283b), %g0     /* CTI          Group brk forced*/
843          sub            %o0, %g6, %o0                   /* IEU0         Group           */
844         RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
845         RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
846         RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
847         RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
848         RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
849         RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
850         RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
851 284:    be,pt           %xcc, 285f                      /* CTI          Group           */
852          andcc          %o2, 4, %g0                     /* IEU1                         */
853         ldx             [%o1 - 8], %g2                  /* Load         Group           */
854         sub             %o0, 8, %o0                     /* IEU0                         */
855         sub             %o1, 8, %o1                     /* IEU0         Group           */
856         stx             %g2, [%o0]                      /* Store                        */
857 285:    be,pt           %xcc, 1f                        /* CTI                          */
858          andcc          %o2, 2, %g0                     /* IEU1         Group           */
859         lduw            [%o1 - 4], %g2                  /* Load         Group           */
860         sub             %o0, 4, %o0                     /* IEU0                         */
861         sub             %o1, 4, %o1                     /* IEU0         Group           */
862         stw             %g2, [%o0]                      /* Store                        */
863 1:      be,pt           %xcc, 1f                        /* CTI                          */
864          andcc          %o2, 1, %g0                     /* IEU1         Group           */
865         lduh            [%o1 - 2], %g2                  /* Load         Group           */
866         sub             %o0, 2, %o0                     /* IEU0                         */
867         sub             %o1, 2, %o1                     /* IEU0         Group           */
868         sth             %g2, [%o0]                      /* Store                        */
869 1:      be,pt           %xcc, 1f                        /* CTI                          */
870          nop                                            /* IEU0         Group           */
871         ldub            [%o1 - 1], %g2                  /* Load         Group           */
872         stb             %g2, [%o0 - 1]                  /* Store        Group + bubble  */
873 1:      retl
874          mov            %g4, %o0
875
876 232:    brz,pt          %g2, 2f                         /* CTI          Group           */
877          sub            %o2, %g2, %o2                   /* IEU0         Group           */
878 1:      ldub            [%o1 - 1], %g5                  /* Load         Group           */
879         sub             %o1, 1, %o1                     /* IEU0                         */
880         sub             %o0, 1, %o0                     /* IEU1                         */
881         subcc           %g2, 1, %g2                     /* IEU1         Group           */
882         bne,pt          %xcc, 1b                        /* CTI                          */
883          stb            %g5, [%o0]                      /* Store                        */
884 2:      andn            %o2, 7, %g5                     /* IEU0         Group           */
885         and             %o2, 7, %o2                     /* IEU1                         */
886         fmovd           %f0, %f2                        /* FPU                          */
887         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
888         ldd             [%g1], %f4                      /* Load         Group           */
889 1:      ldd             [%g1 - 8], %f6                  /* Load         Group           */
890         sub             %g1, 8, %g1                     /* IEU0         Group           */
891         subcc           %g5, 8, %g5                     /* IEU1                         */
892         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
893         std             %f0, [%o0 - 8]                  /* Store                        */
894         sub             %o1, 8, %o1                     /* IEU0         Group           */
895         be,pn           %xcc, 233f                      /* CTI                          */
896          sub            %o0, 8, %o0                     /* IEU1                         */
897         ldd             [%g1 - 8], %f4                  /* Load         Group           */
898         sub             %g1, 8, %g1                     /* IEU0                         */
899         subcc           %g5, 8, %g5                     /* IEU1                         */
900         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
901         std             %f0, [%o0 - 8]                  /* Store                        */
902         sub             %o1, 8, %o1                     /* IEU0                         */
903         bne,pn          %xcc, 1b                        /* CTI          Group           */
904          sub            %o0, 8, %o0                     /* IEU0                         */
905 233:    brz,pn          %o2, 234f                       /* CTI          Group           */
906          nop                                            /* IEU0                         */
907 237:    ldub            [%o1 - 1], %g5                  /* LOAD                         */
908         sub             %o1, 1, %o1                     /* IEU0                         */
909         sub             %o0, 1, %o0                     /* IEU1                         */
910         subcc           %o2, 1, %o2                     /* IEU1                         */
911         bne,pt          %xcc, 237b                      /* CTI                          */
912          stb            %g5, [%o0]                      /* Store        Group           */
913 234:    wr              %g0, FPRS_FEF, %fprs
914         retl
915          mov            %g4, %o0
916 END(memmove)
917 libc_hidden_def(memmove)
918
919 #ifdef USE_BPR
920 weak_alias(memcpy,__align_cpy_1)
921 weak_alias(memcpy,__align_cpy_2)
922 #endif