/* Copyright (c) 2004-2010, Dirk Krause All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above opyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Dirk Krause nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** @file dkle.c LaTeX encoder module. */ #include "dk.h" #include "dkmem.h" #include "dksf.h" #include "dkerror.h" #include "dkstr.h" #include "dkstream.h" #include "dkl2l.h" /** Inside the dklatsup module. */ #define DK_LATSUP_C #include "dkle.h" #include "dkenc.h" $(trace-include) /** Abbreviation for use with sizeof operator. */ typedef void *VPTR; /** File open mode. */ static char str_r[] = { "r" }; /** LaTeX command to open math mode. */ static char str_mm_open[] = { "\\(" }; /** LaTeX command to close math mode. */ static char str_mm_close[] = { "\\)" }; /** Flag: Have character in cache. */ #define FLAG_HAVE_CACHE 1 /** Release translation tables. @param d Array of translation entries. */ static void release_v4 DK_P1(dk_le_ct_t *,d) { dk_le_ct_t *ct; size_t i; char *x; $? "+ release_v4 %s", TR_PTR(d) ct = d; for(i = 0; i < 256; i++) { if(ct->t) { $? ". %d text=%s", (int)i, ct->t x = ct->t; dk_delete(x); } if(ct->m) { $? ". %d math=%s", (int)i, ct->m x = ct->m; dk_delete(x); } if(ct->a) { $? ". %d all=%s", (int)i, ct->a x = ct->a; dk_delete(x); } ct->t = ct->m = ct->a = NULL; ct++; } dk_delete(d); $? "- release_v4" } /** Release translation tables, third directory level. @param d Pointer array. */ static void release_v3 DK_P1(VPTR *,d) { VPTR *vptr; size_t i; $? "+ release_v3 %s", TR_PTR(d) vptr = d; for(i = 0; i < 256; i++) { if(*vptr) { $? ". vptr %ul", (unsigned long)i release_v4((dk_le_ct_t *)(*vptr)); *vptr = NULL; } vptr++; } dk_delete(d); $? "- release_v3" } /** Release translation tables, second directory level. @param d Pointer array. */ static void release_v2 DK_P1(VPTR *,d) { VPTR *vptr; size_t i; $? "+ release_v2 %s", TR_PTR(d) vptr = d; for(i = 0; i < 256; i++) { if(*vptr) { $? ". vptr %ul", (unsigned long)i release_v3((VPTR *)(*vptr)); *vptr = NULL; } vptr++; } dk_delete(d); $? "- release_v2" } /** Release translation tables, first directory level. @param d Pointer array. */ static void release_v1 DK_P1(VPTR *,d) { VPTR *vptr; size_t i; $? "+ release_v1 %s", TR_PTR(d) vptr = d; for(i = 0; i < 256; i++) { if(*vptr) { $? ". vptr %ul", (unsigned long)i release_v2((VPTR *)(*vptr)); *vptr = NULL; } vptr++; } dk_delete(d); $? "- release_v1" } void dkle_close DK_P1(dk_le_t *,leptr) { char *x; $? "+ dkle_close %s", TR_PTR(leptr) if(leptr) { if(leptr->filename) { x = leptr->filename; dk_delete(x); } if(leptr->dirname) { x = leptr->dirname; dk_delete(x); } if(leptr->data) { release_v1((VPTR *)(leptr->data)); leptr->data = NULL; } leptr->filename = NULL; leptr->dirname = NULL; dk_delete(leptr); } $? "- dkle_close" } /** Initialize dk_le_t structure. @param leptr Struture to initialize. @param dirname Directory containing the encoding data files. @return 1 on success, 0 on error. */ static int dkle_init DK_P2(dk_le_t *,leptr, char *,dirname) { int back = 0; size_t sz; $? "+ dkle_init %s %s", TR_PTR(leptr), TR_STR(dirname) leptr->dirname = NULL; leptr->filename = NULL; sz = strlen(dirname); sz++; leptr->dirname = dk_new(char,sz); sz += 15; /* "/000000.dat" */ leptr->filename = dk_new(char,sz); if((leptr->dirname) && (leptr->filename)) { back = 1; strcpy(leptr->dirname, dirname); leptr->flags = 0; leptr->cache_element = 0UL; leptr->cache_ptr = NULL; leptr->data = NULL; } $? "- dkle_init %d", back return back; } dk_le_t * dkle_open DK_P1(char *, dirname) { dk_le_t *back = NULL; $? "+ dkle_open %s", TR_STR(dirname) back = dk_new(dk_le_t,1); if(back) { if(!dkle_init(back, dirname)) { dkle_close(back); back = NULL; } } $? "- dkle_open %s", TR_PTR(back) return back; } /** Allocate new pointer array for code block. @return Pointer to memory on success, NULL on error. */ static VPTR * new_vptr_array DK_P0() { VPTR *back = NULL, *vptr; size_t i; $? "+ new_vptr_array" back = dk_new(VPTR,256); if(back) { vptr = back; for(i = 0; i < 256; i++) { *(vptr++) = NULL; } } $? "- new_vptr_array %s", TR_PTR(back) return back; } /** Remove trailing newline from a string if present. @param l String to modify. */ static void remove_nl DK_P1(char *,l) { char *ptr; $? "+ remove_nl %s", TR_PTR(l) ptr = l; while(*ptr) { switch(*ptr) { case '\r': case '\n': { *ptr = '\0'; } break; default: { ptr++; } break; } } $? "- remove_nl %s", TR_STR(l) } /** Entry mode names. */ static char *the_types[] = { "t$ext", "m$ath", "a$ll", "*", NULL }; /** Add one input line to the current table. @param le LaTeX encoder object. @param ct Current encoder table. @param il Input line. @return 1 on success, 0 on error. */ static int add_input_line DK_P3(dk_le_t *,le, dk_le_ct_t *,ct, char *,il) { int back = 0, t1, t2, reason; char chr, *p1, *p2, *p3, *newptr, *x; unsigned long ul; size_t s; $? "+ add_input_line %s %s %s", TR_PTR(le), TR_PTR(ct), TR_STR(il) reason = DK_ERR_SYNTAX; remove_nl(il); p1 = dkstr_start(il, NULL); if(p1) { $? ". contents in line" if(*p1 == '#') { back = 1; $? ". comment line" } else { $? ". non-comment line" p2 = dkstr_next(p1, NULL); if(p2) { $? ". second part found" p3 = dkstr_next(p2, NULL); if(p3) { $? ". third part found" t1 = dkstr_array_abbr(the_types, p1, '$', 1); if(t1 >= 0) { $? ". valid type selector" chr = 0; ul = 0UL; t2 = 0; switch(*p2) { case '0' : { $? ". decimal or hex number" if((p2[1] == 'x') || (p2[1] == 'X')) { t2 = 1; $? ". hex number" } } break; case '\'': { $? ". character" t2 = 2; } break; } switch(t2) { case 1: { $? ". scan hex number" if(sscanf(&(p2[2]), "%lx", &ul) == 1) { back = 1; } } break; case 2: { $? ". scan characte" p2++; if(*p2 == '\\') { p2++; switch(*p2) { case 'n': { chr = '\n'; } break; case 'r': { chr = '\r'; } break; case 't': { chr = '\t'; } break; case 'a': { chr = '\a'; } break; case 'b': { chr = '\b'; } break; default: { chr = *p2; } break; } } else { chr = *p2; } ul = (unsigned long)((unsigned char)chr); ul &= 255UL; back = 1; } break; default: { $? ". scan decimal number" if(sscanf(p2, "%lu", &ul) == 1) { back = 1; } } break; } if(back) { back = 0; s = ul; s &= 0x00FF; newptr = dkstr_dup(p3); if(newptr) { back = 1; switch(t1) { case 0: { $? ". replace text setting" if(ct[s].t) { x = ct[s].t; dk_delete(x); } ct[s].t = newptr; } break; case 1: { $? ". replace math setting" if(ct[s].m) { x = ct[s].m; dk_delete(x); } ct[s].m = newptr; } break; default: { $? ". replace general setting" if(ct[s].a) { x = ct[s].a; dk_delete(x); } ct[s].a = newptr; } break; } } else { reason = DK_ERR_NOMEM; } } } else { $? "! invalid type selector" } } } } } else { /* empty line */ back = 1; $? ". empty line" } if(!back) { le->error_code = reason; } $? "- add_input_line %d", back return back; } /** Create new encoding table, allocate memory. @param le LaTeX encoder object. @param cd Number of the code block. @return Pointer to new table on success, NULL on error. */ static dk_le_ct_t * new_ct DK_P2(dk_le_t *,le, dk_udword,cd) { dk_le_ct_t *back = NULL, *ct; size_t i; dk_stream_t *istrm = NULL; int r = 0, cc = 1; char inputline[128]; $? "+ new_ct %s %lu", TR_PTR(le), (unsigned long)cd sprintf( le->filename, "%s/%02lx/%02lx/%02lx.dat", le->dirname, ((cd >> 16) & 0x000000FFUL), ((cd >> 8) & 0x000000FFUL), (cd & 0x000000FFUL) ); dksf_correct_fnsep(le->filename); $? ". fn=%s", le->filename istrm = dkstream_openfile(le->filename, str_r, 0, &r); #if DK_HAVE_ZLIB_H if(!istrm) { sprintf(le->filename, "%s/%06lx.gz", le->dirname, cd); dksf_correct_fnsep(le->filename); $? ". fn=%s", le->filename istrm = dkstream_opengz(le->filename, str_r, 0, &r); } #endif #if DK_HAVE_BZLIB_H if(!istrm) { sprintf(le->filename, "%s/%06lx.bz2", le->dirname, cd); dksf_correct_fnsep(le->filename); $? ". fn=%s", le->filename istrm = dkstream_openbz2(le->filename, str_r, 0, &r); } #endif if(istrm) { le->error_line = 0; back = dk_new(dk_le_ct_t,256); if(back) { ct = back; for(i = 0; i < 256; i++) { ct->m = NULL; ct->t = NULL; ct->a = NULL; ct++; } cc = 1; while(cc) { if(dkstream_gets(istrm, inputline, sizeof(inputline))) { le->error_line += 1UL; if(!add_input_line(le, back, inputline)) { cc = 0; dk_delete(back); back = 0; } } else { cc = 0; } } } dkstream_close(istrm); istrm = NULL; } $? "- new_ct %s", TR_PTR(back) return back; } /** Set cache for last used encoding table. @param le LaTeX encoder object. @param cd Current data table number. @return 1 on success, 0 on error. */ static int set_cache_to DK_P2(dk_le_t *,le, dk_udword,cd) { int back = 0; VPTR *v1, *v2, *v3; $? "+ set_cache_to %s %lu", TR_PTR(le), (unsigned long)cd if(le) { if(((le->flags) & FLAG_HAVE_CACHE) && (le->cache_element == cd) && (le->cache_ptr)) { back = 1; } else { unsigned long i1, i2, i3; size_t s1, s2, s3; i1 = (cd >> 16) & 0x000000FFUL; s1 = ((size_t)i1) & 255; i2 = (cd >> 8) & 0x000000FFUL; s2 = ((size_t)i2) & 255; i3 = cd & 0x000000FFUL; s3 = ((size_t)i3) & 255; if(!(le->data)) { le->data = (void *)new_vptr_array(); if(!(le->data)) { le->error_code = DK_ERR_NOMEM; } } if(le->data) { v1 = (VPTR *)(le->data); if(!(v1[s1])) { v1[s1] = (void *)new_vptr_array(); if(!(v1[s1])) { le->error_code = DK_ERR_NOMEM; } } if(v1[s1]) { v2 = (VPTR *)(v1[s1]); if(!(v2[s2])) { v2[s2] = (void *)new_vptr_array(); if(!(v2[s2])) { le->error_code = DK_ERR_NOMEM; } } if(v2[s2]) { v3 = (VPTR *)(v2[s2]); if(!(v3[s3])) { v3[s3] = new_ct(le, cd); } if(v3[s3]) { le->cache_element = cd; le->cache_ptr = (dk_le_ct_t *)(v3[s3]); le->flags |= FLAG_HAVE_CACHE; back = 1; } } } } } } $? "- set_cache_to %d", back return back; } int dkle_load DK_P2(dk_le_t *,le, dk_udword,uc32) { int back = 0; $? "+ dkle_load %s %lu", TR_PTR(le), (unsigned long)uc32 if(le) { if(set_cache_to(le, ((uc32 >> 8) & 0x00FFFFFFUL))) { back = 1; } } $? "- dkle_load %d", back return back; } char * dkle_get_encoding DK_P3(dk_le_t *,le, dk_udword,uc32, int,mm) { char *back = NULL; dk_le_ct_t *ct; size_t sz; $? "+ dkle_get_encoding %s %lu %d", TR_PTR(le), (unsigned long)uc32, mm if(le) { if(set_cache_to(le, ((uc32 >> 8) & 0x00FFFFFFUL))) { sz = (size_t)(uc32 & 0x000000FFUL); sz &= 255; ct = le->cache_ptr; if(mm) { back = ct[sz].m; } else { back = ct[sz].t; } if(!back) { back = ct[sz].a; } } } $? "- dkle_get_encoding %s", TR_STR(back) return back; } int dkle_get_error_code DK_P2(dk_le_t *,l,int,res) { int back = 0; if(l) { back = l->error_code; if(res) { l->error_code = 0; } } return back; } char * dkle_get_filename DK_P1(dk_le_t *,l) { char *back = NULL; if(l) { back = l->filename; } return back; } unsigned long dkle_get_error_lineno DK_P1(dk_le_t *,l) { unsigned long back = 0UL; if(l) { back = l->error_line; } return back; } /* To allocate memory add 1 to the result of dkle_length_plain for the finalizing 0x00 byte. */ size_t dkle_length_plain_to_latex DK_P2(dk_le_t *,le, char *,s) { size_t back = 0; int error_found = 0; char *ptr; char *res; int in_math_mode = 0; size_t sl_mm_open = 2; size_t sl_mm_close = 2; $? "+ dkle_length_plain_to_latex \"%s\"", TR_STR(s) if((le) && (s)) { sl_mm_open = strlen(str_mm_open); sl_mm_close = strlen(str_mm_close); back = 0; ptr = s; while(*ptr) { res = dk_l2l_encoding(*ptr); if(res) { back += strlen(res); if(in_math_mode) { back += sl_mm_close; } in_math_mode = 0; } else { char c; unsigned char uc; unsigned long ul; c = *ptr; uc = (unsigned char)c; ul = (unsigned long)uc; if(dkle_load(le, ul)) { res = dkle_get_encoding(le, ul, 0); if(res) { back += strlen(res); if(in_math_mode) { back += sl_mm_close; } in_math_mode = 0; } else { res = dkle_get_encoding(le, ul, 1); if(res) { back += strlen(res); if(!in_math_mode) { back += sl_mm_open; } in_math_mode = 1; } else { $? "! encoding not found" error_found = 1; } } } else { $? "! string table not loaded" error_found = 1; } } ptr++; } if(in_math_mode) { back += sl_mm_close; } if(error_found) { back = 0; $? "! previous error found" } } $? "- dkle_length_plain_to_latex %lu", back, (unsigned long)back return back; } size_t dkle_length_utf8_to_latex DK_P2(dk_le_t *,le, char *,s) { size_t back = 0; int cc; size_t avail, step; dk_udword ucb; char *ptr, *found; int in_math_mode, error_found; size_t sl_mm_open, sl_mm_close; $? "+ dkle_length_utf8_to_latex \"%s\"", TR_STR(s) if((le) && (s)) { error_found = 0; sl_mm_open = strlen(str_mm_open); sl_mm_close = strlen(str_mm_close); ptr = s; cc = 1; avail = strlen(s); in_math_mode = 0; while(cc) { cc = 0; if(avail > 0) { step = 0; ucb = 0UL; cc = dkenc_utf82uc(&ucb, (unsigned char *)ptr, avail, &step); if(cc) { found = NULL; if(dkle_load(le, ucb)) { found = dkle_get_encoding(le, ucb, 0); if(found) { back += strlen(found); if(in_math_mode) { back += sl_mm_close; } in_math_mode = 0; } else { found = dkle_get_encoding(le, ucb, 1); if(found) { back += strlen(found); if(!in_math_mode) { back += sl_mm_open; } in_math_mode = 1; } else { if(ucb < 256UL) { found = dk_l2l_encoding((char)ucb); if(in_math_mode) { back += sl_mm_close; } in_math_mode = 0; if(found) { back += strlen(found); } else { back++; } } } } } else { error_found = 1; cc = 0; /* ##### ERROR: Failed to load table */ } if(step < avail) { avail = avail - step; } else { avail = 0; } while(step--) ptr++; } else { /* ##### ERROR: Conversion failed */ error_found = 1; } } if(error_found) { cc = 0; } } if(in_math_mode) { back += sl_mm_close; } in_math_mode = 0; if(error_found) { back = 0; } } $? "- dkle_length_utf8_to_latex %lu", (unsigned long)back return back; } int dkle_put_utf8_to_latex DK_P4(dk_le_t *,le, char *,d, size_t,sz, char *,s) { int back = 0; int cc; size_t avail, step; dk_udword ucb; char *ptr, *found; int in_math_mode, error_found; size_t sl_mm_open, sl_mm_close, sl_d; if((le) && (s)) { back = 1; sl_mm_open = strlen(str_mm_open); sl_mm_close = strlen(str_mm_close); ptr = s; cc = 1; avail = strlen(s); in_math_mode = 0; error_found = 0; *d = '\0'; while(cc) { sl_d = strlen(d); cc = 0; if(avail > 0) { step = 0; ucb = 0UL; cc = dkenc_utf82uc(&ucb, (unsigned char *)ptr, avail, &step); if(cc) { found = NULL; if(dkle_load(le, ucb)) { found = dkle_get_encoding(le, ucb, 0); if(found) { if(in_math_mode) { if(sl_d + sl_mm_close + strlen(found) < sz) { strcat(d, str_mm_close); strcat(d, found); } else { error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } else { if(sl_d + strlen(found) < sz) { strcat(d, found); } else { error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } in_math_mode = 0; } else { found = dkle_get_encoding(le, ucb, 1); if(found) { if(!in_math_mode) { if(sl_d + sl_mm_open + strlen(found) < sz) { strcat(d, str_mm_open); strcat(d, found); } else { error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } else { if(sl_d + strlen(found) < sz) { strcat(d, found); } else { error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } in_math_mode = 1; } else { if(ucb < 256UL) { found = dk_l2l_encoding((char)ucb); if(in_math_mode) { if(sl_d + sl_mm_close < sz) { strcat(d, str_mm_close); sl_d = strlen(d); } else { error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } in_math_mode = 0; if(found) { if(sl_d + strlen(found) < sz) { strcat(d, found); } else { error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } else { if(sl_d + 1 < sz) { d[sl_d] = *ptr; d[sl_d+1] = '\0'; } else { error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } } } } } else { error_found = 1; cc = 0; /* ##### ERROR: Failed to load table */ } if(step < avail) { avail = avail - step; } else { avail = 0; } while(step--) ptr++; } else { /* ##### ERROR: Conversion failed */ error_found = 1; } } if(error_found) { cc = 0; } } if(in_math_mode) { if(strlen(d) + sl_mm_close < sz) { strcat(d, str_mm_close); } else { error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } in_math_mode = 0; if(error_found) { back = 0; } } return back; } int dkle_put_plain_to_latex DK_P4(dk_le_t *,le, char *,d, size_t,sz, char *,s) { int back = 0; int error_found = 0; int in_math_mode = 0; char *ptr; char *res; size_t sl_mm_open = 2; size_t sl_mm_close = 2; size_t sl_d = 0; $? "+ dkle_put_plain_to_latex \"%s\"", TR_STR(s) if((le) && (d) && (s) && (sz > 0)) { back = 1; sl_mm_open = strlen(str_mm_open); sl_mm_close = strlen(str_mm_close); *d = '\0'; ptr = s; while((*ptr) && (!error_found)) { sl_d = strlen(d); res = dk_l2l_encoding(*ptr); if(res) { if(in_math_mode) { if(sl_d + sl_mm_close + strlen(res) < sz) { strcat(d, str_mm_close); strcat(d, res); } else { $? "! buffer too small" error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } else { if(sl_d + strlen(res) < sz) { strcat(d, res); } else { $? "! buffer too small" error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } in_math_mode = 0; } else { char c; unsigned char uc; unsigned long ul; c = *ptr; uc = (unsigned char)c; ul = (unsigned long)uc; if(dkle_load(le, ul)) { res = dkle_get_encoding(le, ul, 0); if(res) { if(in_math_mode) { if(sl_d + sl_mm_close + strlen(res) < sz) { strcat(d, str_mm_close); strcat(d, res); } else { $? "! buffer too small" error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } else { if(sl_d + strlen(res) < sz) { strcat(d, res); } else { $? "! buffer too small" error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } in_math_mode = 0; } else { res = dkle_get_encoding(le, ul, 1); if(res) { if(in_math_mode) { if(sl_d + strlen(res) < sz) { strcat(d, res); } else { $? "! buffer too small" error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } else { if(sl_d + sl_mm_open + strlen(res) < sz) { strcat(d, str_mm_open); strcat(d, res); } else { $? "! buffer too small" error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } in_math_mode = 1; } else { $? "! no encoding found" error_found = 1; } } } else { $? "! failed to load encoding file" error_found = 1; } } ptr++; } if(in_math_mode) { if(strlen(d) + sl_mm_close < sz) { strcat(d, str_mm_close); } else { $? "! buffer too small" error_found = 1; le->error_code = DK_ERR_STRING_TOO_LONG; } } } if(error_found) { back = 0; $? "! previous error" } $? "- dkle_put_plain_to_latex %d", back return back; }