aboutsummaryrefslogtreecommitdiffstats
path: root/makeenums.pl
blob: 24fcd41d46e58884120a3246edbf733a9152cb6c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/perl
#
# Process enum.list and output ucd.h as well as
# C code indicies for each enum.
#

sub close_frag($) {
    my($longname) = @_;

    print UCD_H "};\n\n";
    print UCD_H "int unicode_property_names_\L${longname}\E(enum unicode_\L${longname}\E, const char **, const char **);\n\n";

    print FRAG "};\n";
    print FRAG <<EOF;

int
unicode_property_names_\L${longname}\E(enum unicode_\L${longname}\E v,
				       const char **longname,
				       const char **shortname)
{
    const char *ln, *sn;
    int rv;

    if ( (unsigned)v >= sizeof enum_names/sizeof(struct libucd_enum_names) ) {
	ln = NULL;
	sn = NULL;
	rv = 1;
    } else {
	ln = enum_names[v].long_name;
	sn = enum_names[v].short_name;
	rv = 0;
    }
    if ( longname )  *longname  = ln;
    if ( shortname ) *shortname = sn;
    return rv;
}
EOF
    close(FRAG);
}    

open(LIST,   '<', "enum.list") or die;
open(UCD_IN, '<', "ucd.h.in")  or die;
open(UCD_H,  '>', "ucd.h")     or die;
mkdir("enums", 0777);

while ( defined($line = <UCD_IN>) ) {
    last if ( $line =~ /ENUMS\;/ );
    print UCD_H $line;
}

undef $shortname;

while( defined($line = <LIST>) ) {
    chomp $line;
    next if ( $line =~ /^\s*(\#\#.*|)$/ );

    if ( $line =~ /^\#\s*(\S+)\s+\((\S+)\)\s+(long|short)\s*$/ ) {
	$prev = $longname;

	$longname  = $1;
	$shortname = $2;
	$whichname = ($3 eq 'long') ? 1 : 0;

	if ( defined($prev) ) {
	    close_frag($prev);
	}
	print UCD_H "enum unicode_\L${longname}\E {\n";
	open(FRAG, '>', "enums/${longname}.c") or die;
	print FRAG "#include \"libucd_int.h\"\n";
	print FRAG "static const struct libucd_enum_names enum_names[] = {\n";
	
	$seqpos = 0;
    } elsif ( $line =~ /\;/ ) {
	$line =~ s/\s*\#.*$//;	# Remove comments
	@list = split(/\s*;\s*/, $line);

	if ( shift(@list) ne $shortname ) {
	    die "$0: Unexpected line: $line\n";
	}

	undef $epos;
	if ( $list[0] =~ /^[0-9]+$/ ) {
	    $epos = shift(@list);
	}

	# Write ucd.h

	# $na is the preferred name, $nx the alternate
	($na = $list[$whichname]) =~ tr/-/_/;
	($nx = $list[1-$whichname]) =~ tr/-/_/;

	# If the Unicode Consortium defines a numeric value,
	# use that, otherwise use the sequential order in enum.list.
	# For that reason, enum.list should be maintained manually
	# and entries may only be added to the end of lists.

	$epos = defined($epos) ? $epos : $seqpos;
	$seqpos++;

	printf UCD_H "  %s = %d,\n", 
	"UC_\U${shortname}\E_${na}", $epos;
	if ($nx ne 'n/a' && $nx ne $na && $nx) {
	    printf UCD_H "  %s = %d,\n", 
	    "UC_\U${shortname}\E_${nx}", $epos;
	}
	
	# Write generator fragment

	($ln = $list[1]) =~ tr/_/ /;
	$sn = $list[0];
	$sn = ($sn eq 'n/a') ? '0' : "\"$sn\"";
	if ( defined($epos) ) {
	    printf FRAG "\t[%3d] = { \"%s\", %s },\n", $epos, $ln, $sn;
	} else {
	    printf FRAG "\t{ \"%s\", %s },\n", $ln, $sn;
	}
    } else {
	die "$0: Cannot parse: $line\n";
    }
}

close_frag($longname);

while ( defined($line = <UCD_IN>) ) {
    print UCD_H $line;
}

close(UCD_IN);
close(UCD_H);