/* File : loaddict.c Author : Richard A. O'Keefe Updated: 06/27/01 Purpose: Read a WBXML dictionary in textual form and save it out as C source code for use with wbxml.c. */ #ifndef lint static char SCCSid[] = "@(#)01/06/27 loaddict.c 1.2"; #endif/*lint*/ #include #include #include #include "wbxml.h" /* The input MUST be encoded in ASCII or UTF-8; use iconv or tc to convert other encodings. ::= ::= + , , ::= * ::= * ::= + If the number of entries in a is C and in an is P, then 0 <= C, 0 <= P C+P <= 59 for C+P <= 123 for and ::= should be C identifier ::= should be SGML public id ::= ( | )+ ::= ::= '\r' ['\n'] | '\n' ::= \\ backslash | \# octothorpe | \t tab | \r return | \n linefeed | \### three octal digits | \hHH two hex digits | \uHHHH four hex digits | \Hhhhhhh six hex digits | \Uhhhhhhhh eight hex digits | \ continuation ::= any character except \\ \r or \n. ::= '#' ( | \\)* Comment lines are allowed anywhere and are completely ignored. If a string must begin with #, then that # must be escaped with a backslash. Other # characters do not need such protection. Any mix of UNIX (\n), Macintosh (\r), and DOS (\r\n) line terminators may be used. Files are regarded as ending with lots of ; they do not have to be explicitly present. */ /* All text is placed in a single array. 20 bytes/string * 2000 strings = 40kb; the size of the array is arbitrarily set at 63kb to make this program portable (C99 sizes, not C89 sizes). */ #define CHAR_TABLE_SIZE (63 * 1024) static unsigned char char_table[CHAR_TABLE_SIZE] = {'*','\0'}; static int ctp = 2; /* start with an empty string */ static int nl_check(FILE *stream) { int c = getc(stream); if (c >= 0 && c != '\n') ungetc(c, stream); return 1; } /* All input is done using one function to read a . It assumes that the source stream is in binary mode, but if it is in text mode no harm will result. For most strings we want one NUL at the end; for attributes we want two. When we check for '=', we'll strip one off if we find an '='. */ unsigned char *get_string(FILE *stream, int nuls) { int c, d; /* unsigned char U {EOF} */ int n; /* 0..8 */ int p = ctp; /* if we fail out, ctp is unchanged */ for (;;) { c = getc(stream); if (c != '#') break; for (;;) { c = getc(stream); if (c < 0) return char_table+1; if (c == '\n' || (c == '\r' && nl_check(stream))) break; } } if (c < 0) return char_table+1; for (;;) { if (c == '\n' || (c == '\r' && nl_check(stream))) break; if (c == '\\') { c = getc(stream); switch (c) { case '\n': continue; case '\r': (void)nl_check(stream); continue; case 't': c = '\t'; break; case 'r': c = '\r'; break; case 'n': c = '\n'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c = c & 7; d = getc(stream); if (!('0' <= d && d <= '7')) { ungetc(d, stream); } else { c = (c << 3) | (d & 7); d = getc(stream); if (!('0' <= d && d <= '7')) { ungetc(d, stream); } else { c = (c << 3) | (d & 7); } } break; case 'h': n = 2; goto hex; case 'u': n = 4; goto hex; case 'H': n = 6; goto hex; case 'U': n = 8; hex: c = 0; while (n-- != 0) { d = getc(stream); if (('0' <= d && d <= '9')) { c = (c << 4) | (d - '0'); } else if (('A' <= d && d <= 'F')) { c = (c << 4) | (d - 'A' + 10); } else if (('a' <= d && d <= 'f')) { c = (c << 4) | (d - 'a' + 10); } else { ungetc(d, stream); break; } } break; default: /* leave c alone */ break; } } if (p == CHAR_TABLE_SIZE) { fprintf(stderr, "Character table overflow.\n"); fprintf(stderr, "Recompile with larger CHAR_TABLE_SIZE.\n"); exit(EXIT_FAILURE); } char_table[p++] = c; c = getc(stream); } /* Empty lines are common; be content with a single copy. '*' is used as a filler; be content with a single copy. */ if (p == ctp) return char_table+1; if (p == ctp+1 && char_table[p-1] == '*') return char_table; { unsigned char *r = &char_table[ctp]; for (n = 0; n < nuls; n++) { if (p == CHAR_TABLE_SIZE) { fprintf(stderr, "Character table overflow.\n"); fprintf(stderr, "Recompile with larger CHAR_TABLE_SIZE.\n"); exit(EXIT_FAILURE); } char_table[p++] = '\0'; } ctp = p; return r; } } static unsigned char *name; /* We allow a maximum of 20 public identifiers. This is entirely arbitrary. */ #define MAX_PUBIDS 20 static unsigned char *pubs[MAX_PUBIDS + 1]; static int npubs; static unsigned char *tags0[64]; static unsigned char **tags[256] = {tags0}; static int ncommontags = 5; static int ntagpages = 0; static unsigned char *atts0[256]; static unsigned char **atts[256] = {atts0}; static int ncommonatts = 5; static int ncommonvals = 5; static int nattpages = 0; static int nvalpages = 0; static void print(FILE *stream) { int i, j; unsigned char c; fprintf(stream, "static unsigned char c[%d] = {\n", ctp); for (i = 0; i < ctp; i++) { if ((i&15) == 0) fprintf(stream, " "); c = char_table[i]; if (c >= ' ' && c < 127 && c != '\\' && c != '\'') { fprintf(stream, "'%c',", c); } else { fprintf(stream, "%3u,", c); } if ((i&15) == 15) putc('\n', stream); } if ((i&15) != 0) putc('\n', stream); fprintf(stream, "};\n\n"); fprintf(stream, "#define NULS (unsigned char *)0\n\n"); fprintf(stream, "static unsigned char *pubs[%d] = {\n", npubs+1); for (i = 0; i < npubs; i++) fprintf(stream, " c+%5d,\n", pubs[i]-char_table); fprintf(stream, " NULS\n};\n\n"); for (i = 0; i < ntagpages; i++) { fprintf(stream, "static unsigned char *tags%d[64] = {\n", i); for (j = 0; j < 64; j++) { if ((j&7) == 0) fprintf(stream, " "); if (tags[i][j] == 0) { fprintf(stream, "NULS, "); } else { fprintf(stream, "c+%5d,", tags[i][j]-char_table); } if ((j&7) == 7) putc('\n', stream); } fprintf(stream, "};\n"); } if (ntagpages != 0) putc('\n', stream); for (i = 0; i < nattpages; i++) { fprintf(stream, "static unsigned char *atts%d[256] = {\n", i); for (j = 0; j < 256; j++) { if ((j&7) == 0) fprintf(stream, " "); if (atts[i][j] == 0) { fprintf(stream, "NULS, "); } else { fprintf(stream, "c+%5d,", atts[i][j]-char_table); } if ((j&7) == 7) putc('\n', stream); } fprintf(stream, "};\n\n"); } if (nattpages != 0) putc('\n', stream); fprintf(stream, "#include \"wbxml.h\"\n\n"); fprintf(stream, "struct WBXML_dictionary %s = {\n", name); fprintf(stream, " /*pubs = */pubs,\n"); fprintf(stream, " /*tags = */{\n"); for (i = 0; i < ntagpages; i++) fprintf(stream, " tags%d,\n", i); if (ntagpages < 256) fprintf(stream, " (unsigned char **)0\n"); fprintf(stream, " },\n"); fprintf(stream, " /*atts = */{\n"); for (i = 0; i < nattpages; i++) fprintf(stream, " atts%d,\n", i); if (nattpages < 256) fprintf(stream, " (unsigned char **)0\n"); fprintf(stream, " }\n"); fprintf(stream, "};\n\n"); } static void error(char const *msg) { fprintf(stderr, "loaddict: %s\n", msg); exit(EXIT_FAILURE); } int main(void) { int i; unsigned char *s; unsigned char *e; name = get_string(stdin, 1); if (*name == '\0') error("Empty name"); for (i = 0; *(s = get_string(stdin, 1)) != '\0'; i++) { if (i == MAX_PUBIDS) error("Too many public identifiers"); pubs[i] = s; } pubs[npubs = i] = 0; for (ncommontags = 5; *(s = get_string(stdin, 1)) != '\0'; ncommontags++) { if (ncommontags == 64) error("Too many common tags"); tags0[ncommontags] = s; } while (*(s = get_string(stdin, 1)) != '\0') { unsigned char **tagsi; tagsi = tags[ntagpages]; if (tagsi == 0) { tagsi = malloc(sizeof tags0); if (tagsi == 0) error("Ran out of memory"); tags[ntagpages] = tagsi; for (i = 5; i < ncommontags; i++) tagsi[i] = tags0[i]; } for (i = ncommontags; *s != '\0'; i++) { if (i == 64) error("Too many tags in some code page"); tagsi[i] = s; s = get_string(stdin, 1); } ntagpages++; } if (ntagpages == 0 && ncommontags > 5) ntagpages = 1; for (ncommonatts = 5; *(s = get_string(stdin, 2)) != '\0'; ncommonatts++) { if (ncommonatts == 128) error("Too many common attributess"); e = (unsigned char *)strchr((char const *)s, '='); if (e != 0) { *e = '\0'; ctp--; /* discard extra NUL */ } atts0[ncommonatts] = s; } while (*(s = get_string(stdin, 2)) != '\0') { unsigned char **attsi; attsi = atts[nattpages]; if (attsi == 0) { attsi = malloc(sizeof atts0); if (attsi == 0) error("Ran out of memory"); atts[nattpages] = attsi; for (i = 5; i < ncommonatts; i++) attsi[i] = atts0[i]; } for (i = ncommonatts; *s != '\0'; i++) { if (i == 128) error("Too many atts in some code page"); e = (unsigned char *)strchr((char const *)s, '='); if (e != 0) { *e = '\0'; ctp--; /* discard extra NUL */ } attsi[i] = s; s = get_string(stdin, 2); } nattpages++; } if (nattpages == 0 && ncommonatts > 5) nattpages = 1; for (ncommonvals = 5; *(s = get_string(stdin, 1)) != '\0'; ncommonvals++) { if (ncommonvals == 128) error("Too many common values"); atts0[128+ncommonvals] = s; } while (*(s = get_string(stdin, 1)) != '\0') { unsigned char **attsi; attsi = atts[nvalpages]; if (attsi != 0) { attsi = malloc(sizeof atts0); if (attsi == 0) error("Ran out of memory"); atts[nvalpages] = attsi; for (i = 5; i < ncommonvals; i++) attsi[128+i] = atts0[128+i]; } for (i = ncommonvals; *s != '\0'; i++) { if (i == 128) error("Too many vals in some code page"); attsi[128+i] = s; s = get_string(stdin, 1); } nvalpages++; } if (nvalpages == 0 && ncommonvals > 5) nvalpages = 1; for (; nattpages < nvalpages; nattpages++) { for (i = 5; i < ncommonatts; i++) atts[nattpages][i] = atts0[i]; } for (; nvalpages < nattpages; nvalpages++) { for (i = 5; i < ncommonvals; i++) atts[nvalpages][128+i] = atts0[128+i]; } #if 0 printf("Name = '%s'\n", name); for (i = 0; i < npubs; i++) printf("PubId[%d] = '%s'\n", i, pubs[i]); for (i = 0; i < ntagpages; i++) { int j; for (j = 5; j < 64 != 0; j++) if (tags[i][j] != 0) printf("Tag[%3d][%2x=%2d] = '%s'\n", i, j, j, tags[i][j]); } #else print(stdout); #endif return 0; }