diff options
author | H. Peter Anvin <hpa@smyrno.hos.anvin.org> | 2005-12-04 16:25:29 -0800 |
---|---|---|
committer | H. Peter Anvin <hpa@smyrno.hos.anvin.org> | 2005-12-04 16:25:29 -0800 |
commit | ee098bb180400f57a77a5598d964616f612a752c (patch) | |
tree | 492b884afdeec1ad0395863b5d25b3042d845f01 | |
parent | 3dc28c59aae75103be42f015f171ef8243e6dfde (diff) | |
download | libucd-ee098bb180400f57a77a5598d964616f612a752c.tar.gz |
Produce table for the ucstoname hash
-rw-r--r-- | Makefile | 53 | ||||
-rw-r--r-- | bin2c.pl | 2 | ||||
-rwxr-xr-x | convert_ucd.pl | 2 | ||||
-rw-r--r-- | libucd_int.h | 8 | ||||
-rw-r--r-- | mk_nametoucs_tab.c | 128 |
5 files changed, 177 insertions, 16 deletions
@@ -1,14 +1,14 @@ CC = cc -CFLAGS = -g -O -I. +CFLAGS = -g -O -I. -W -Wall PICFLAGS = -fPIC LDFLAGS = HOST_CC = cc -HOST_CFLAGS = -g -O +HOST_CFLAGS = -g -O -I. -W -Wall HOST_LDFLAGS = HOST_LIBS = -PERL = perl +PERL = time perl # # These are the files produced by convert_ucd.pl @@ -18,29 +18,40 @@ CVT_FILES = gen/jamo.c gen/nameslist.tab gen/nametoucs.keys gen/nametoucs.tab \ # ----------------------------------------------------------------------- -.SUFFIXES: .c .o .lo .s .ls .i .li .cc .h +.SUFFIXES: .c .o .lo .s .ls .i .ho .hs .hi .cc .h .c.o: $(CC) $(CFLAGS) -c -o $@ $< +.c.lo: + $(CC) $(CFLAGS) $(PICFLAGS) -c -o $@ $< .c.s: $(CC) $(CFLAGS) -S -o $@ $< +.c.ls: + $(CC) $(CFLAGS) $(PICFLAGS) -S -o $@ $< .c.i: $(CC) $(CFLAGS) -E -o $@ $< -.c.lo: +.c.ho: $(HOST_CC) $(HOST_CFLAGS) -c -o $@ $< -.c.ls: +.c.hs: $(HOST_CC) $(HOST_CFLAGS) -S -o $@ $< -.c.li: +.c.hi: $(HOST_CC) $(HOST_CFLAGS) -E -o $@ $< # ----------------------------------------------------------------------- -all : perfect/perfect gen/nametoucs_hash.o gen/ucstoname_hash.o \ - proparray.o +LIBSRC = proparray.c gen/nametoucs_hash.c gen/ucstoname_hash.c \ + gen/jamo.c gen/nameslist.c gen/nameslist_dict.c \ + gen/ucstoname_tab.c + +LIBOBJ = $(patsubst %.c,%.o,$(LIBSRC)) +SO_OBJ = $(patsubst %.c,%.lo,$(LIBSRC)) + +# ----------------------------------------------------------------------- +all : $(LIBOBJ) $(SO_OBJ) clean: rm -rf gen - rm -f *.o *.a *.so *.so.* + rm -f *.o *.i *.*.a *.so *.so.* $(MAKE) -C perfect clean # ----------------------------------------------------------------------- @@ -56,14 +67,14 @@ perfect/perfect: $(wildcard perfect/*.c perfect/*.h) $(MAKE) -C perfect gen/nametoucs_hash.c: gen/nametoucs.keys perfect/perfect - perfect/perfect -im libucd_nametoucs_hash gen/nametoucs_hash.c \ + perfect/perfect -im _libucd_nametoucs_hash gen/nametoucs_hash.c \ gen/nametoucs_hash.h < gen/nametoucs.keys gen/nametoucs_hash.h: gen/nametoucs_hash.c : Generated by side effect gen/ucstoname_hash.c: gen/ucstoname.keys perfect/perfect - perfect/perfect -hm libucd_ucstoname_hash gen/ucstoname_hash.c \ + perfect/perfect -hm _libucd_ucstoname_hash gen/ucstoname_hash.c \ gen/ucstoname_hash.h < gen/ucstoname.keys gen/ucstoname_hash.h: gen/ucstoname_hash.c @@ -76,8 +87,24 @@ gen/nameslist_tab.c gen/nameslist.offset: gen/nameslist.compr : Generated by side effect gen/nameslist.c: gen/nameslist.compr bin2c.pl - $(PERL) bin2c.pl < $< > $@ || rm -f $@ + $(PERL) bin2c.pl _libucd_names_list < $< > $@ || rm -f $@ + +gen/mk_nametoucs_tab: mk_nametoucs_tab.ho gen/ucstoname_hash.ho + $(HOST_CC) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LIBS) + +gen/ucstoname_tab.c: gen/mk_nametoucs_tab \ + gen/proparrayindex gen/nameslist.offset + gen/mk_nametoucs_tab # ----------------------------------------------------------------------- proparray.o: proparray.c ucd.h libucd_int.h gen/proparray.c +proparray.lo: proparray.c ucd.h libucd_int.h gen/proparray.c + +mk_nametoucs_tab.ho: gen/ucstoname_hash.h + +gen/ucstoname_tab.o: gen/ucstoname_tab.c libucd_int.h +gen/ucstoname_tab.lo: gen/ucstoname_tab.c libucd_int.h + +gen/nameslist_dict.o: gen/nameslist_dict.c +gen/nameslist_dict.lo: gen/nameslist_dict.c @@ -26,7 +26,7 @@ if ( $#ARGV != 0 ) { ($table_name) = @ARGV; -printf "unsigned char %s[] = {\n", $table_name; +printf "const unsigned char %s[] = {\n", $table_name; $pos = 0; $linelen = 8; diff --git a/convert_ucd.pl b/convert_ucd.pl index 34470d4..9d85cc2 100755 --- a/convert_ucd.pl +++ b/convert_ucd.pl @@ -502,9 +502,9 @@ sub make_properties_array() print $fh $mine; $cnt++; $prev = $mine; + printf $fhi "0x%05x $cnt\n", $c, $cnt; } $prop_array_position{$c} = $cnt; - printf $fhi "0x%05x $cnt\n", $c, $cnt; } print $fh "\t/* Total: $cnt ranges */\n"; diff --git a/libucd_int.h b/libucd_int.h index c23b8ed..3da1857 100644 --- a/libucd_int.h +++ b/libucd_int.h @@ -17,7 +17,7 @@ extern const char _libucd_hangul_jamo_t[][4]; /* This structure is exactly 32 bytes long, nice and alignable. */ struct _libucd_property_array { - int32_t ucd; /* Wasteful but fast (used in search) */ + int32_t ucs; /* Wasteful but fast (used in search) */ uint8_t general_category; uint8_t script; uint8_t numeric_value_num; @@ -41,4 +41,10 @@ struct _libucd_property_array { unsigned /* unused */ :1; }; +struct _libucd_ucstoname_tab { + int24 ucs; + uint24 names_offset; + uint16_t proparray_offset; +}; + #endif diff --git a/mk_nametoucs_tab.c b/mk_nametoucs_tab.c new file mode 100644 index 0000000..1205d49 --- /dev/null +++ b/mk_nametoucs_tab.c @@ -0,0 +1,128 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include "gen/ucstoname_hash.h" + +#define UCS_CNT 0x110000 + +const char *program; + +void die(const char *msg) +{ + fprintf(stderr, "%s: %s\n", program, msg); + exit(1); +} + +static const char *int24str(int32_t n) +{ + static char str[17]; + + sprintf(str, "{0x%02x,0x%02x,0x%02x}", + (n & 0xff), + ((n >> 8) & 0xff), + ((n >> 16) & 0xff)); + return str; +} + +static int proparrayindex[UCS_CNT]; +static void read_proparrayindex(void) +{ + FILE *f = fopen("gen/proparrayindex", "rt"); + if ( !f ) + die("could not open gen/proparrayindex"); + int last = 0, prev_ix = -1; + int curr, i, ix; + + while ( fscanf(f, "%x %d\n", &curr, &ix) == 2 ) { + for ( i = last ; i < curr ; i++ ) + proparrayindex[i] = prev_ix; + last = curr; + prev_ix = ix; + } + + for ( i = last ; i < UCS_CNT ; i++ ) + proparrayindex[i] = prev_ix; + + fclose(f); +} + + +static int nameslistoffset[UCS_CNT]; +static void read_nameslistoffset(void) +{ + FILE *f = fopen("gen/nameslist.offset", "rt"); + if ( !f ) + die("could not open gen/nameslist.offset"); + int curr, offset; + int i; + + for ( i = 0 ; i < UCS_CNT ; i++ ) + nameslistoffset[i] = -1; + + while ( fscanf(f, "%x %d\n", &curr, &offset) == 2 ) + nameslistoffset[curr] = offset; + + fclose(f); +} + + +int32_t hash_to_ucs[PHASHNKEYS]; +static void compute_hash_to_ucs(void) +{ + uint32_t hash; + int i; + + for ( i = 0 ; i < PHASHNKEYS ; i++ ) + hash_to_ucs[i] = -1; + + for ( i = 0 ; i < UCS_CNT ; i++ ) { + if ( nameslistoffset[i] != -1 ) { + hash = _libucd_ucstoname_hash(i); + + if ( hash >= PHASHNKEYS ) + die("hash not minimal"); + + if ( hash_to_ucs[hash] != -1 ) + die("hash collision"); + + hash_to_ucs[hash] = i; + } + } +} + + +static void make_ucstoname_tab(void) +{ + FILE *f = fopen("gen/ucstoname_tab.c", "wt"); + if ( !f ) + die("could not create gen/ucstoname_tab.c"); + int i; + int32_t ucs; + + fprintf(f, + "#include \"libucd_int.h\"\n" + "const struct _libucd_ucstoname_tab _libucd_ucstoname_tab[] =\n" + "{\n"); + + for ( i = 0 ; i < PHASHNKEYS ; i++ ) { + ucs = hash_to_ucs[i]; + fprintf(f, "\t{ %s, ", int24str(ucs)); + fprintf(f, "%s, ", int24str(nameslistoffset[ucs])); + fprintf(f, "%d },\n", proparrayindex[ucs]); + } + fprintf(f, "};\n"); + fclose(f); +} + + +int main(void) +{ + read_proparrayindex(); + read_nameslistoffset(); + compute_hash_to_ucs(); + make_ucstoname_tab(); + + return 0; +} + |