Skip to content

Commit cef0459

Browse files
committed
mktables: Handle Unicode 16.0 Unikemet.txt file
This includes several new properties, some of which are considered "provisional" by Unicode, which means they can be heavily revised or withdrawn. These properties are designed for use by scholars of hieroglyphics.
1 parent 11ab430 commit cef0459

File tree

6 files changed

+43
-5
lines changed

6 files changed

+43
-5
lines changed

charclass_invlists.inc

+1-1
Original file line numberDiff line numberDiff line change
@@ -436055,7 +436055,7 @@ static const U8 WB_table[23][23] = {
436055436055
* 3f4f32ed2a577344a508114527e721d7a8b633d32f38945d47fe0c743650c585 lib/unicore/extracted/DLineBreak.txt
436056436056
* 710abf2d581ac9c57f244c0834f9d9969d9781e0396adccd330eaae658ac7d6b lib/unicore/extracted/DNumType.txt
436057436057
* 6bd30f385f3baf3ab5d5308c111a81de87bea5f494ba0ba69e8ab45263b8c34d lib/unicore/extracted/DNumValues.txt
436058-
* d13b7019817a693e50c3b233b25efd73390334c226f700650628734388c34209 lib/unicore/mktables
436058+
* 122e104ecbf3408f4d18a5f4e4b9ce12737f421d0b31581835459e38d9c93ca6 lib/unicore/mktables
436059436059
* 55d90fdc3f902e5c0b16b3378f9eaa36e970a1c09723c33de7d47d0370044012 lib/unicore/version
436060436060
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
436061436061
* c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl

lib/unicore/mktables

+38
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,15 @@ push @tables_that_may_be_empty, 'Grapheme_Cluster_Break=Prepend'
871871
push @tables_that_may_be_empty, 'Canonical_Combining_Class=CCC133'
872872
if $v_version ge v6.2.0;
873873

874+
# These properties of Egyptian hieroglyphs are not handled by Perl. Their
875+
# intended audience is only specialist Egyptologists
876+
push @tables_that_may_be_empty, qw(kEH_Cat kEH_Desc kEH_HG kEH_IFAO
877+
kEH_JSesh
878+
kEH_NoMirror kEH_NoMirror=Yes
879+
kEH_NoMirror=No
880+
kEH_NoRotate kEH_NoRotate=Yes)
881+
if $v_version ge v16.0.0;
882+
874883
# The lists below are hashes, so the key is the item in the list, and the
875884
# value is the reason why it is in the list. This makes generation of
876885
# documentation easier.
@@ -13377,6 +13386,30 @@ sub filter_early_version_name_alias_line {
1337713386
return;
1337813387
}
1337913388

13389+
sub setup_Unikemet{
13390+
13391+
# These are provisional properties, so aren't in PropAliases.txt
13392+
my %properties = (
13393+
Core => $ENUM,
13394+
FVal => $STRING,
13395+
Func => $STRING,
13396+
UniK => $ENUM,
13397+
);
13398+
for my $property (keys %properties) {
13399+
Property->new("kEH_$property",
13400+
Default_Map => "",
13401+
Type => $STRING,,
13402+
);
13403+
}
13404+
}
13405+
13406+
sub filter_Unikemet_line {
13407+
$_ =~ s/;/,/g; # mktables can't accept semi-colons
13408+
$_ =~ s/\t/; /g;
13409+
$_ =~ s/ ^ U\+ //x;
13410+
return;
13411+
}
13412+
1338013413
sub filter_all_caps_script_names {
1338113414

1338213415
# Some early Unicode releases had the script names in all CAPS. This
@@ -19809,6 +19842,11 @@ my @input_file_objects = (
1980919842
Skip => $Unused_Skip,
1981019843
UCD => 0,
1981119844
),
19845+
Input_file->new('Unikemet.txt', v16.0.0,
19846+
Pre_Handler => \&setup_Unikemet,
19847+
Each_Line_Handler => \&filter_Unikemet_line,
19848+
UCD => 0,
19849+
),
1981219850
Input_file->new('DoNotEmit.txt', v16.0.0,
1981319851
# Advice about characters that are unwise to create; not
1981419852
# any properties, though we could create some.

lib/unicore/uni_keywords.pl

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

regcharclass.h

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

regexp_constants.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
* 3f4f32ed2a577344a508114527e721d7a8b633d32f38945d47fe0c743650c585 lib/unicore/extracted/DLineBreak.txt
7979
* 710abf2d581ac9c57f244c0834f9d9969d9781e0396adccd330eaae658ac7d6b lib/unicore/extracted/DNumType.txt
8080
* 6bd30f385f3baf3ab5d5308c111a81de87bea5f494ba0ba69e8ab45263b8c34d lib/unicore/extracted/DNumValues.txt
81-
* d13b7019817a693e50c3b233b25efd73390334c226f700650628734388c34209 lib/unicore/mktables
81+
* 122e104ecbf3408f4d18a5f4e4b9ce12737f421d0b31581835459e38d9c93ca6 lib/unicore/mktables
8282
* 55d90fdc3f902e5c0b16b3378f9eaa36e970a1c09723c33de7d47d0370044012 lib/unicore/version
8383
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
8484
* c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl

uni_keywords.h

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)