2 * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
4 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
21 #include "include/bits/uClibc_ctype.h"
23 /* TODO: maybe support -v like gen_wctype.c */
24 #define verbose_msg(msg...) if (verbose) fprintf(stderr, msg)
26 /* #define CTYPE_PACKED */
27 #define UPLOW_IDX_SHIFT 3
28 /* best if 2 unpacked or 3 packed */
29 #define CTYPE_IDX_SHIFT 3
30 /* 3 or 4 are very similar */
31 #define C2WC_IDX_SHIFT 3
33 #define CTYPE_IDX_LEN (128 >> (CTYPE_IDX_SHIFT))
34 #define UPLOW_IDX_LEN (128 >> (UPLOW_IDX_SHIFT))
35 #define C2WC_IDX_LEN (128 >> (C2WC_IDX_SHIFT))
37 /* #ifdef CTYPE_PACKED */
38 /* #define CTYPE_ROW_LEN (1 << ((CTYPE_IDX_SHIFT)-1)) */
40 #define CTYPE_ROW_LEN (1 << (CTYPE_IDX_SHIFT))
42 #define UPLOW_ROW_LEN (1 << (UPLOW_IDX_SHIFT))
43 #define C2WC_ROW_LEN (1 << (C2WC_IDX_SHIFT))
47 #define MAX_WCHAR (0x2600-1)
49 static unsigned char ctype_tbl[256 * CTYPE_ROW_LEN];
50 static unsigned char uplow_tbl[256 * UPLOW_ROW_LEN];
52 static unsigned short c2wc_tbl[256 * C2WC_ROW_LEN];
54 static unsigned char tt[MAX_WCHAR+1];
55 static unsigned char ti[MAX_WCHAR+1];
56 static unsigned char xi[MAX_WCHAR+1];
58 static int n_ctype_rows;
59 static int n_uplow_rows;
61 static int n_c2wc_rows;
66 #define RANGE MAX_WCHAR
71 #define II_LEN ((MAX_WCHAR+1) >> (TT_SHIFT+TI_SHIFT))
74 unsigned long c2w[256];
75 unsigned char w2c[MAX_WCHAR];
76 unsigned char ii[II_LEN];
77 unsigned char ctype_idx[CTYPE_IDX_LEN];
78 unsigned char uplow_idx[UPLOW_IDX_LEN];
79 unsigned char c2wc_idx[C2WC_IDX_LEN];
82 int main(int argc, char **argv)
86 unsigned long max_wchar;
92 unsigned char row[256];
94 unsigned short wrow[256];
96 char codeset_list[500];
97 char codeset_index[30];
98 int codeset_list_end = 0;
101 if (!setlocale(LC_CTYPE, "en_US.UTF-8")) {
102 /* Silly foreigners disabling en_US locales */
103 FILE *fp = popen("locale -a", "r");
111 if (fgets(buf, sizeof(buf) - 10, fp) == NULL)
115 if (len > 0 && buf[len - 1] == '\n')
117 if (len < 5 || strcasecmp(&buf[len-5], ".UTF8") != 0)
118 strcat(buf, ".UTF8");
119 if (setlocale(LC_CTYPE, buf))
124 printf("could not find a UTF8 locale ... please enable en_US.UTF-8\n");
132 /* User requested 8-bit codesets, but didn't list any... */
133 /* Allow to build, just so this feature can be left on in config. */
134 printf("#ifdef __CTYPE_HAS_8_BIT_LOCALES\n");
135 printf("#warning ignoring 8 bit codesets request"
136 " as no codesets specified.\n");
138 printf("#undef __CTYPE_HAS_8_BIT_LOCALES\n\n");
140 printf("#define __LOCALE_DATA_NUM_CODESETS\t\t0\n");
141 printf("#define __LOCALE_DATA_CODESET_LIST\t\t\"\"\n");
145 /* printf("#define __CTYPE_HAS_8_BIT_LOCALES\t1\n\n"); */
146 printf("#ifdef __CTYPE_HAS_8_BIT_LOCALES\n\n");
150 printf("#undef __CTYPE_HAS_8_BIT_LOCALES\n\n");
152 printf("#define __LOCALE_DATA_NUM_CODESETS\t\t0\n");
153 printf("#define __LOCALE_DATA_CODESET_LIST\t\t\"\"\n");
155 printf("#define __CTYPE_HAS_8_BIT_LOCALES\t\t1\n\n");
158 printf("#define __LOCALE_DATA_Cctype_IDX_SHIFT\t%d\n", CTYPE_IDX_SHIFT);
159 printf("#define __LOCALE_DATA_Cctype_IDX_LEN\t\t%d\n", CTYPE_IDX_LEN);
161 printf("#define __LOCALE_DATA_Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN >> 1);
162 printf("#define __LOCALE_DATA_Cctype_PACKED\t\t1\n");
164 printf("#define __LOCALE_DATA_Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN);
165 printf("#undef __LOCALE_DATA_Cctype_PACKED\n");
168 printf("\n#define __LOCALE_DATA_Cuplow_IDX_SHIFT\t%d\n", UPLOW_IDX_SHIFT);
169 printf("#define __LOCALE_DATA_Cuplow_IDX_LEN\t\t%d\n", UPLOW_IDX_LEN);
170 printf("#define __LOCALE_DATA_Cuplow_ROW_LEN\t\t%d\n", UPLOW_ROW_LEN);
173 printf("\n#define __LOCALE_DATA_Cc2wc_IDX_LEN\t\t%d\n", C2WC_IDX_LEN);
174 printf("#define __LOCALE_DATA_Cc2wc_IDX_SHIFT\t\t%d\n", C2WC_IDX_SHIFT);
175 printf("#define __LOCALE_DATA_Cc2wc_ROW_LEN\t\t%d\n", C2WC_ROW_LEN);
178 printf("\ntypedef struct {\n");
179 printf("\tunsigned char idx8ctype[%d];\n", CTYPE_IDX_LEN);
180 printf("\tunsigned char idx8uplow[%d];\n", UPLOW_IDX_LEN);
182 printf("\tunsigned char idx8c2wc[%d];\n", C2WC_IDX_LEN);
183 printf("\tunsigned char idx8wc2c[%d];\n", II_LEN);
186 printf("} __codeset_8_bit_t;\n\n");
188 printf("} __attribute__((__packed__)) __codeset_8_bit_t;\n\n");
189 #endif /* __metag__ */
191 printf("#ifdef WANT_DATA\n\n");
192 printf("static const __codeset_8_bit_t codeset_8_bit[%d] = {\n", argc-1);
196 codeset_index[0] = 0;
198 if (!(fp = fopen(*++argv,"r"))) {
199 fprintf(stderr, "cannot open file \"%s\"\n", *argv);
202 fprintf(stderr, "processing %s... ", *argv);
209 s0 = strrchr(*argv, '/');
215 s1 = strrchr(s0, '.');
222 /* if ((numsets == 0) && strncmp("ASCII", s0, n)) { */
223 /* printf("error - first codeset isn't ASCII!\n"); */
224 /* return EXIT_FAILURE; */
227 if (numsets >= sizeof(codeset_index)) {
228 fprintf(stderr, "error - too many codesets!\n");
232 if (codeset_list_end + n + 1 + numsets + 1 + 1 >= 256) {
233 fprintf(stderr, "error - codeset list to big!\n");
237 codeset_index[numsets+1] = codeset_index[numsets] + n+1;
238 strncpy(codeset_list + codeset_list_end, s0, n);
239 codeset_list_end += (n+1);
240 codeset_list[codeset_list_end - 1] = 0;
242 printf("\t{ /* %.*s */", n, s0);
245 memset(&csd[numsets], 0, sizeof(charset_data));
246 memset(xi, 0, sizeof(xi));
251 while (fgets(buf,sizeof(buf),fp)) {
252 if ((2 != sscanf(buf, "{ %lx , %lx", &c, &wc))
253 || (c >= 256) || (wc > MAX_WCHAR)) {
254 fprintf(stderr, "error: scanf failure! \"%s\"\n", buf);
258 /* don't put in w2c... dynamicly build tt instead. */
260 if (c <= 0x7f) { /* check the 7bit entries but don't store */
262 fprintf(stderr, "error: c != wc in %s\n", buf);
265 csd[numsets].c2w[c] = wc;
266 csd[numsets].w2c[wc] = 0; /* ignore */
267 if (wc > max_wchar) {
271 csd[numsets].c2w[c] = wc;
272 csd[numsets].w2c[wc] = c;
273 if (wc > max_wchar) {
279 fprintf(stderr, "%d lines ", lines);
281 for (i = 0 ; i <= MAX_WCHAR ; i += (1 << TT_SHIFT)) {
282 p = &csd[numsets].w2c[i];
283 for (j = 0 ; j < tt_num ; j++) {
284 if (!memcmp(p, &tt[j << TT_SHIFT], (1 << TT_SHIFT))) {
288 if (j == tt_num) { /* new entry */
289 memcpy(&tt[j << TT_SHIFT], p, (1 << TT_SHIFT));
292 xi[i >> TT_SHIFT] = j;
295 for (i = 0 ; i <= (MAX_WCHAR >> TT_SHIFT) ; i += (1 << TI_SHIFT)) {
297 for (j = 0 ; j < ti_num ; j++) {
298 if (!memcmp(p, &ti[j << TI_SHIFT], (1 << TI_SHIFT))) {
302 if (j == ti_num) { /* new entry */
303 memcpy(&ti[j << TI_SHIFT], p, (1 << TI_SHIFT));
306 csd[numsets].ii[i >> TI_SHIFT] = j;
307 /* fprintf(stderr, "%d ", i >> TI_SHIFT); */
311 printf("\n\t\t/* idx8ctype data */\n\t\t{");
312 for (i = 128 ; i < 256 ; i++) {
316 /* if (!(i & 0x7)) { */
320 c = csd[numsets].c2w[i];
322 if (c == 0) { /* non-existant char in codeset */
323 d = __CTYPE_unclassified;
324 } else if (iswdigit(c)) {
326 } else if (iswalpha(c)) {
327 d = __CTYPE_alpha_nonupper_nonlower;
329 d = __CTYPE_alpha_lower;
331 d = __CTYPE_alpha_upper_lower;
333 } else if (iswupper(c)) {
334 d = __CTYPE_alpha_upper;
336 } else if (iswpunct(c)) {
338 } else if (iswgraph(c)) {
340 } else if (iswprint(c)) {
341 d = __CTYPE_print_space_nonblank;
343 d = __CTYPE_print_space_blank;
345 } else if (iswspace(c) && !iswcntrl(c)) {
346 d = __CTYPE_space_nonblank_noncntrl;
348 d = __CTYPE_space_blank_noncntrl;
350 } else if (iswcntrl(c)) {
351 d = __CTYPE_cntrl_nonspace;
353 d = __CTYPE_cntrl_space_nonblank;
355 d = __CTYPE_cntrl_space_blank;
359 d = __CTYPE_unclassified;
363 row[i & (CTYPE_ROW_LEN-1)] = d;
364 if ((i & (CTYPE_ROW_LEN-1)) == (CTYPE_ROW_LEN-1)) {
366 for (j=0 ; j < n_ctype_rows ; j++) {
367 if (!memcmp(p, row, CTYPE_ROW_LEN)) {
372 if (j == n_ctype_rows) { /* new entry */
373 if (++n_ctype_rows > 256) {
374 fprintf(stderr, "error -- to many ctype rows!\n");
377 memcpy(p, row, CTYPE_ROW_LEN);
379 csd[numsets].ctype_idx[i >> CTYPE_IDX_SHIFT] = j;
380 if (!((i >> CTYPE_IDX_SHIFT) & 0x7)
381 && (i != (127 + CTYPE_ROW_LEN))
395 printf(",\n\t\t/* idx8uplow data */\n\t\t{");
396 for (i = 128 ; i < 256 ; i++) {
398 /* if (!(i & 0x7)) { */
401 c = csd[numsets].c2w[i];
406 if (u >= 0x80) u = csd[numsets].w2c[u];
407 if (l >= 0x80) l = csd[numsets].w2c[l];
409 if (u == 0) u = i; /* upper is missing, so ignore */
410 if (l == 0) l = i; /* lower is missing, so ignore */
413 /* store as unsigned char and let overflow handle it. */
414 /* if ((((u-i) < CHAR_MIN) || ((u-i) > CHAR_MAX)) */
415 /* || (((i-l) < CHAR_MIN) || ((i-l) > CHAR_MAX)) */
417 /* fprintf(stderr, "error - uplow diff out of range! %d %ld %ld\n", */
419 /* return EXIT_FAILURE; */
422 row[i & (UPLOW_ROW_LEN-1)] = ((l==i) ? (u-i) : (i-l));
423 if ((i & (UPLOW_ROW_LEN-1)) == (UPLOW_ROW_LEN-1)) {
425 for (j=0 ; j < n_uplow_rows ; j++) {
426 if (!memcmp(p, row, UPLOW_ROW_LEN)) {
431 if (j == n_uplow_rows) { /* new entry */
432 if (++n_uplow_rows > 256) {
433 fprintf(stderr, "error -- to many uplow rows!\n");
436 memcpy(p, row, UPLOW_ROW_LEN);
438 csd[numsets].uplow_idx[i >> UPLOW_IDX_SHIFT] = j;
439 if (!((i >> UPLOW_IDX_SHIFT) & 0x7)
440 && (i != (127 + UPLOW_ROW_LEN))
448 if (!(i & 0x7) && i) {
451 printf(" %4ld,", (l==i) ? (u-i) : (i-l));
452 /* printf(" %4ld,", (l==i) ? u : l); */
454 if ((u != i) || (l != i)) {
456 printf(" %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, \n",
460 (unsigned long) towlower(c),
462 (unsigned long) towupper(c));
465 printf(" %#08lx, %8ld, %d, %8ld, %d, %#08lx\n",
482 #else /* DO_WIDE_CHAR */
485 printf(",\n\t\t/* idx8c2wc data */\n\t\t{");
486 for (i = 128 ; i < 256 ; i++) {
488 wrow[i & (C2WC_ROW_LEN-1)] = csd[numsets].c2w[i];
489 if ((i & (C2WC_ROW_LEN-1)) == (C2WC_ROW_LEN-1)) {
490 p = (unsigned char *) c2wc_tbl;
491 for (j=0 ; j < n_c2wc_rows ; j++) {
492 if (!memcmp(p, (char *) wrow, 2*C2WC_ROW_LEN)) {
497 if (j == n_c2wc_rows) { /* new entry */
498 if (++n_c2wc_rows > 256) {
499 fprintf(stderr, "error -- to many c2wc rows!\n");
502 memcpy(p, (char *) wrow, 2*C2WC_ROW_LEN);
504 csd[numsets].c2wc_idx[i >> C2WC_IDX_SHIFT] = j;
505 if (!((i >> C2WC_IDX_SHIFT) & 0x7)
506 && (i != (127 + C2WC_ROW_LEN))
513 if (!(i & 0x7) && i) {
516 printf(" %#6lx,", csd[numsets].c2w[i]);
523 /* fprintf(stderr, "\nII_LEN = %d\n", II_LEN); */
524 printf("\t\t/* idx8wc2c data */\n\t\t{");
525 for (i = 0 ; i < II_LEN ; i++) {
526 if (!(i & 0x7) && i) {
529 printf(" %#4x,", csd[numsets].ii[i]);
534 #endif /* DO_WIDE_CHAR */
539 fprintf(stderr, "done\n");
542 printf("\n#endif /* WANT_DATA */\n");
546 printf("#define __LOCALE_DATA_Cwc2c_DOMAIN_MAX\t%#x\n", RANGE);
547 printf("#define __LOCALE_DATA_Cwc2c_TI_SHIFT\t\t%d\n", TI_SHIFT);
548 printf("#define __LOCALE_DATA_Cwc2c_TT_SHIFT\t\t%d\n", TT_SHIFT);
549 printf("#define __LOCALE_DATA_Cwc2c_II_LEN\t\t%d\n", II_LEN);
550 printf("#define __LOCALE_DATA_Cwc2c_TI_LEN\t\t%d\n", ti_num << TI_SHIFT);
551 printf("#define __LOCALE_DATA_Cwc2c_TT_LEN\t\t%d\n", tt_num << TT_SHIFT);
554 printf("\n#define __LOCALE_DATA_Cwc2c_TBL_LEN\t\t%d\n",
555 (ti_num << TI_SHIFT) + (tt_num << TT_SHIFT));
557 printf("#ifdef WANT_DATA\n\n");
558 printf("static const unsigned char __LOCALE_DATA_Cwc2c_data[%d] = {\n",
559 (ti_num << TI_SHIFT) + (tt_num << TT_SHIFT));
560 printf("\t/* ti_table */\n\t");
561 for (i=0 ; i < ti_num << TI_SHIFT ; i++) {
565 printf(" %#4x,", ti[i]);
568 printf("\t/* tt_table */\n\t");
569 for (i=0 ; i < tt_num << TT_SHIFT ; i++) {
573 printf(" %#4x,", tt[i]);
577 printf("\n#endif /* WANT_DATA */\n");
578 #endif /* DO_WIDE_CHAR */
580 printf("\n#define __LOCALE_DATA_Cuplow_TBL_LEN\t\t%d\n",
581 n_uplow_rows * UPLOW_ROW_LEN);
582 printf("\n#ifdef WANT_DATA\n\n");
584 printf("\nstatic const unsigned char __LOCALE_DATA_Cuplow_data[%d] = {\n",
585 n_uplow_rows * UPLOW_ROW_LEN);
587 for (j=0 ; j < n_uplow_rows ; j++) {
589 for (i=0 ; i < UPLOW_ROW_LEN ; i++) {
590 printf(" %#4x,", (unsigned int)((unsigned char) p[i]));
597 printf("\n#endif /* WANT_DATA */\n");
598 printf("\n#define __LOCALE_DATA_Cctype_TBL_LEN\t\t%d\n",
600 n_ctype_rows * CTYPE_ROW_LEN / 2
602 n_ctype_rows * CTYPE_ROW_LEN
605 printf("\n#ifdef WANT_DATA\n\n");
608 printf("\nstatic const unsigned char __LOCALE_DATA_Cctype_data[%d] = {\n",
610 n_ctype_rows * CTYPE_ROW_LEN / 2
612 n_ctype_rows * CTYPE_ROW_LEN
616 for (j=0 ; j < n_ctype_rows ; j++) {
618 for (i=0 ; i < CTYPE_ROW_LEN ; i++) {
620 printf(" %#4x,", (unsigned int)(p[i] + (p[i+1] << 4)));
623 printf(" %#4x,", (unsigned int)p[i]);
631 printf("\n#endif /* WANT_DATA */\n");
635 printf("\n#define __LOCALE_DATA_Cc2wc_TBL_LEN\t\t%d\n",
636 n_c2wc_rows * C2WC_ROW_LEN);
637 printf("\n#ifdef WANT_DATA\n\n");
639 printf("\nstatic const unsigned short __LOCALE_DATA_Cc2wc_data[%d] = {\n",
640 n_c2wc_rows * C2WC_ROW_LEN);
641 p = (unsigned char *) c2wc_tbl;
642 for (j=0 ; j < n_c2wc_rows ; j++) {
644 for (i=0 ; i < C2WC_ROW_LEN ; i++) {
645 printf(" %#6x,", (unsigned int)(((unsigned short *)p)[i]));
651 printf("\n#endif /* WANT_DATA */\n");
652 #endif /* DO_WIDE_CHAR */
655 printf("#define __LOCALE_DATA_NUM_CODESETS\t\t%d\n", numsets);
656 printf("#define __LOCALE_DATA_CODESET_LIST \\\n\t\"");
657 for (i=0 ; i < numsets ; i++) {
658 printf("\\x%02x", numsets + 1 + (unsigned char) codeset_index[i]);
659 if (((i & 7) == 7) && (i + 1 < numsets)) {
660 printf("\" \\\n\t\"");
663 printf("\" \\\n\t\"\\0\"");
664 for (i=0 ; i < numsets ; i++) {
665 printf(" \\\n\t\"%s\\0\"",
666 codeset_list + ((unsigned char)codeset_index[i]));
670 for (i=0 ; i < numsets ; i++) {
673 strcpy(buf, codeset_list + ((unsigned char)codeset_index[i]));
674 for (z=buf ; *z ; z++) {
679 printf("#define __CTYPE_HAS_CODESET_%s\n", buf);
682 printf("#define __CTYPE_HAS_CODESET_UTF_8\n");
683 #endif /* DO_WIDE_CHAR */
686 printf("\n#endif /* __CTYPE_HAS_8_BIT_LOCALES */\n\n");
691 fprintf(stderr, "tt_num = %d ti_num = %d\n", tt_num, ti_num);
692 fprintf(stderr, "max_wchar = %#lx\n", max_wchar);
694 fprintf(stderr, "size is %d * %d + %d * %d + %d * %d = %d\n",
695 tt_num, 1 << TT_SHIFT, ti_num, 1 << TI_SHIFT,
696 ((MAX_WCHAR >> (TT_SHIFT + TI_SHIFT)) + 1), numsets,
697 j = tt_num * (1 << TT_SHIFT) + ti_num * (1 << TI_SHIFT)
698 + ((MAX_WCHAR >> (TT_SHIFT + TI_SHIFT)) + 1) * numsets);
700 #endif /* DO_WIDE_CHAR */
708 fprintf(stderr, "ctype - CTYPE_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n",
709 CTYPE_IDX_SHIFT, numsets, CTYPE_IDX_LEN, n_ctype_rows, CTYPE_ROW_LEN / i,
710 j = numsets * CTYPE_IDX_LEN + n_ctype_rows * CTYPE_ROW_LEN / i);
713 fprintf(stderr, "uplow - UPLOW_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n",
714 UPLOW_IDX_SHIFT, numsets, UPLOW_IDX_LEN, n_uplow_rows, UPLOW_ROW_LEN,
715 j = numsets * UPLOW_IDX_LEN + n_uplow_rows * UPLOW_ROW_LEN);
720 fprintf(stderr, "c2wc - C2WC_IDX_SHIFT = %d -- %d * %d + 2 * %d * %d = %d\n",
721 C2WC_IDX_SHIFT, numsets, C2WC_IDX_LEN, n_c2wc_rows, C2WC_ROW_LEN,
722 j = numsets * C2WC_IDX_LEN + 2 * n_c2wc_rows * C2WC_ROW_LEN);
725 #endif /* DO_WIDE_CHAR */
727 fprintf(stderr, "total size = %d\n", total_size);
729 /* for (i=0 ; i < numsets ; i++) { */
730 /* printf("codeset_index[i] = %d codeset_list[ci[i]] = \"%s\"\n", */
731 /* (unsigned char) codeset_index[i], */
732 /* codeset_list + ((unsigned char)codeset_index[i])); */