aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@smyrno.hos.anvin.org>2005-12-04 16:25:29 -0800
committerH. Peter Anvin <hpa@smyrno.hos.anvin.org>2005-12-04 16:25:29 -0800
commitee098bb180400f57a77a5598d964616f612a752c (patch)
tree492b884afdeec1ad0395863b5d25b3042d845f01
parent3dc28c59aae75103be42f015f171ef8243e6dfde (diff)
downloadlibucd-ee098bb180400f57a77a5598d964616f612a752c.tar.gz
Produce table for the ucstoname hash
-rw-r--r--Makefile53
-rw-r--r--bin2c.pl2
-rwxr-xr-xconvert_ucd.pl2
-rw-r--r--libucd_int.h8
-rw-r--r--mk_nametoucs_tab.c128
5 files changed, 177 insertions, 16 deletions
diff --git a/Makefile b/Makefile
index 47bdf29..625545c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,14 +1,14 @@
CC = cc
-CFLAGS = -g -O -I.
+CFLAGS = -g -O -I. -W -Wall
PICFLAGS = -fPIC
LDFLAGS =
HOST_CC = cc
-HOST_CFLAGS = -g -O
+HOST_CFLAGS = -g -O -I. -W -Wall
HOST_LDFLAGS =
HOST_LIBS =
-PERL = perl
+PERL = time perl
#
# These are the files produced by convert_ucd.pl
@@ -18,29 +18,40 @@ CVT_FILES = gen/jamo.c gen/nameslist.tab gen/nametoucs.keys gen/nametoucs.tab \
# -----------------------------------------------------------------------
-.SUFFIXES: .c .o .lo .s .ls .i .li .cc .h
+.SUFFIXES: .c .o .lo .s .ls .i .ho .hs .hi .cc .h
.c.o:
$(CC) $(CFLAGS) -c -o $@ $<
+.c.lo:
+ $(CC) $(CFLAGS) $(PICFLAGS) -c -o $@ $<
.c.s:
$(CC) $(CFLAGS) -S -o $@ $<
+.c.ls:
+ $(CC) $(CFLAGS) $(PICFLAGS) -S -o $@ $<
.c.i:
$(CC) $(CFLAGS) -E -o $@ $<
-.c.lo:
+.c.ho:
$(HOST_CC) $(HOST_CFLAGS) -c -o $@ $<
-.c.ls:
+.c.hs:
$(HOST_CC) $(HOST_CFLAGS) -S -o $@ $<
-.c.li:
+.c.hi:
$(HOST_CC) $(HOST_CFLAGS) -E -o $@ $<
# -----------------------------------------------------------------------
-all : perfect/perfect gen/nametoucs_hash.o gen/ucstoname_hash.o \
- proparray.o
+LIBSRC = proparray.c gen/nametoucs_hash.c gen/ucstoname_hash.c \
+ gen/jamo.c gen/nameslist.c gen/nameslist_dict.c \
+ gen/ucstoname_tab.c
+
+LIBOBJ = $(patsubst %.c,%.o,$(LIBSRC))
+SO_OBJ = $(patsubst %.c,%.lo,$(LIBSRC))
+
+# -----------------------------------------------------------------------
+all : $(LIBOBJ) $(SO_OBJ)
clean:
rm -rf gen
- rm -f *.o *.a *.so *.so.*
+ rm -f *.o *.i *.*.a *.so *.so.*
$(MAKE) -C perfect clean
# -----------------------------------------------------------------------
@@ -56,14 +67,14 @@ perfect/perfect: $(wildcard perfect/*.c perfect/*.h)
$(MAKE) -C perfect
gen/nametoucs_hash.c: gen/nametoucs.keys perfect/perfect
- perfect/perfect -im libucd_nametoucs_hash gen/nametoucs_hash.c \
+ perfect/perfect -im _libucd_nametoucs_hash gen/nametoucs_hash.c \
gen/nametoucs_hash.h < gen/nametoucs.keys
gen/nametoucs_hash.h: gen/nametoucs_hash.c
: Generated by side effect
gen/ucstoname_hash.c: gen/ucstoname.keys perfect/perfect
- perfect/perfect -hm libucd_ucstoname_hash gen/ucstoname_hash.c \
+ perfect/perfect -hm _libucd_ucstoname_hash gen/ucstoname_hash.c \
gen/ucstoname_hash.h < gen/ucstoname.keys
gen/ucstoname_hash.h: gen/ucstoname_hash.c
@@ -76,8 +87,24 @@ gen/nameslist_tab.c gen/nameslist.offset: gen/nameslist.compr
: Generated by side effect
gen/nameslist.c: gen/nameslist.compr bin2c.pl
- $(PERL) bin2c.pl < $< > $@ || rm -f $@
+ $(PERL) bin2c.pl _libucd_names_list < $< > $@ || rm -f $@
+
+gen/mk_nametoucs_tab: mk_nametoucs_tab.ho gen/ucstoname_hash.ho
+ $(HOST_CC) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LIBS)
+
+gen/ucstoname_tab.c: gen/mk_nametoucs_tab \
+ gen/proparrayindex gen/nameslist.offset
+ gen/mk_nametoucs_tab
# -----------------------------------------------------------------------
proparray.o: proparray.c ucd.h libucd_int.h gen/proparray.c
+proparray.lo: proparray.c ucd.h libucd_int.h gen/proparray.c
+
+mk_nametoucs_tab.ho: gen/ucstoname_hash.h
+
+gen/ucstoname_tab.o: gen/ucstoname_tab.c libucd_int.h
+gen/ucstoname_tab.lo: gen/ucstoname_tab.c libucd_int.h
+
+gen/nameslist_dict.o: gen/nameslist_dict.c
+gen/nameslist_dict.lo: gen/nameslist_dict.c
diff --git a/bin2c.pl b/bin2c.pl
index 8c56ca6..93907b9 100644
--- a/bin2c.pl
+++ b/bin2c.pl
@@ -26,7 +26,7 @@ if ( $#ARGV != 0 ) {
($table_name) = @ARGV;
-printf "unsigned char %s[] = {\n", $table_name;
+printf "const unsigned char %s[] = {\n", $table_name;
$pos = 0;
$linelen = 8;
diff --git a/convert_ucd.pl b/convert_ucd.pl
index 34470d4..9d85cc2 100755
--- a/convert_ucd.pl
+++ b/convert_ucd.pl
@@ -502,9 +502,9 @@ sub make_properties_array()
print $fh $mine;
$cnt++;
$prev = $mine;
+ printf $fhi "0x%05x $cnt\n", $c, $cnt;
}
$prop_array_position{$c} = $cnt;
- printf $fhi "0x%05x $cnt\n", $c, $cnt;
}
print $fh "\t/* Total: $cnt ranges */\n";
diff --git a/libucd_int.h b/libucd_int.h
index c23b8ed..3da1857 100644
--- a/libucd_int.h
+++ b/libucd_int.h
@@ -17,7 +17,7 @@ extern const char _libucd_hangul_jamo_t[][4];
/* This structure is exactly 32 bytes long, nice and alignable. */
struct _libucd_property_array {
- int32_t ucd; /* Wasteful but fast (used in search) */
+ int32_t ucs; /* Wasteful but fast (used in search) */
uint8_t general_category;
uint8_t script;
uint8_t numeric_value_num;
@@ -41,4 +41,10 @@ struct _libucd_property_array {
unsigned /* unused */ :1;
};
+struct _libucd_ucstoname_tab {
+ int24 ucs;
+ uint24 names_offset;
+ uint16_t proparray_offset;
+};
+
#endif
diff --git a/mk_nametoucs_tab.c b/mk_nametoucs_tab.c
new file mode 100644
index 0000000..1205d49
--- /dev/null
+++ b/mk_nametoucs_tab.c
@@ -0,0 +1,128 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include "gen/ucstoname_hash.h"
+
+#define UCS_CNT 0x110000
+
+const char *program;
+
+void die(const char *msg)
+{
+ fprintf(stderr, "%s: %s\n", program, msg);
+ exit(1);
+}
+
+static const char *int24str(int32_t n)
+{
+ static char str[17];
+
+ sprintf(str, "{0x%02x,0x%02x,0x%02x}",
+ (n & 0xff),
+ ((n >> 8) & 0xff),
+ ((n >> 16) & 0xff));
+ return str;
+}
+
+static int proparrayindex[UCS_CNT];
+static void read_proparrayindex(void)
+{
+ FILE *f = fopen("gen/proparrayindex", "rt");
+ if ( !f )
+ die("could not open gen/proparrayindex");
+ int last = 0, prev_ix = -1;
+ int curr, i, ix;
+
+ while ( fscanf(f, "%x %d\n", &curr, &ix) == 2 ) {
+ for ( i = last ; i < curr ; i++ )
+ proparrayindex[i] = prev_ix;
+ last = curr;
+ prev_ix = ix;
+ }
+
+ for ( i = last ; i < UCS_CNT ; i++ )
+ proparrayindex[i] = prev_ix;
+
+ fclose(f);
+}
+
+
+static int nameslistoffset[UCS_CNT];
+static void read_nameslistoffset(void)
+{
+ FILE *f = fopen("gen/nameslist.offset", "rt");
+ if ( !f )
+ die("could not open gen/nameslist.offset");
+ int curr, offset;
+ int i;
+
+ for ( i = 0 ; i < UCS_CNT ; i++ )
+ nameslistoffset[i] = -1;
+
+ while ( fscanf(f, "%x %d\n", &curr, &offset) == 2 )
+ nameslistoffset[curr] = offset;
+
+ fclose(f);
+}
+
+
+int32_t hash_to_ucs[PHASHNKEYS];
+static void compute_hash_to_ucs(void)
+{
+ uint32_t hash;
+ int i;
+
+ for ( i = 0 ; i < PHASHNKEYS ; i++ )
+ hash_to_ucs[i] = -1;
+
+ for ( i = 0 ; i < UCS_CNT ; i++ ) {
+ if ( nameslistoffset[i] != -1 ) {
+ hash = _libucd_ucstoname_hash(i);
+
+ if ( hash >= PHASHNKEYS )
+ die("hash not minimal");
+
+ if ( hash_to_ucs[hash] != -1 )
+ die("hash collision");
+
+ hash_to_ucs[hash] = i;
+ }
+ }
+}
+
+
+static void make_ucstoname_tab(void)
+{
+ FILE *f = fopen("gen/ucstoname_tab.c", "wt");
+ if ( !f )
+ die("could not create gen/ucstoname_tab.c");
+ int i;
+ int32_t ucs;
+
+ fprintf(f,
+ "#include \"libucd_int.h\"\n"
+ "const struct _libucd_ucstoname_tab _libucd_ucstoname_tab[] =\n"
+ "{\n");
+
+ for ( i = 0 ; i < PHASHNKEYS ; i++ ) {
+ ucs = hash_to_ucs[i];
+ fprintf(f, "\t{ %s, ", int24str(ucs));
+ fprintf(f, "%s, ", int24str(nameslistoffset[ucs]));
+ fprintf(f, "%d },\n", proparrayindex[ucs]);
+ }
+ fprintf(f, "};\n");
+ fclose(f);
+}
+
+
+int main(void)
+{
+ read_proparrayindex();
+ read_nameslistoffset();
+ compute_hash_to_ucs();
+ make_ucstoname_tab();
+
+ return 0;
+}
+