+ equivalent characters tool and some tweaks

This commit is contained in:
Vovanium 2023-10-07 22:14:39 +03:00
parent 18ae0ac804
commit 2d05455aca
6 changed files with 288 additions and 78 deletions

View File

@ -723,10 +723,10 @@ DWIDTH 4 0
BBX 4 8 0 -1
BITMAP
00
60
50
50
50
70
70
50
50
00
@ -2642,13 +2642,13 @@ SWIDTH 500 0
DWIDTH 4 0
BBX 4 8 0 -1
BITMAP
00
50
00
60
20
20
20
20
00
ENDCHAR
STARTCHAR LATIN SMALL LETTER ETH
@ -4925,7 +4925,7 @@ BITMAP
00
50
50
70
50
70
70
50
@ -4940,7 +4940,7 @@ BITMAP
00
00
50
70
50
70
70
50
@ -5021,7 +5021,7 @@ BITMAP
20
40
ENDCHAR
STARTCHAR LATIN CAPITAL LETTER J
STARTCHAR GREEK CAPITAL LETTER YOT
ENCODING 895
SWIDTH 500 0
DWIDTH 4 0
@ -5389,9 +5389,9 @@ BBX 4 8 0 -1
BITMAP
00
50
70
70
50
70
70
50
50
00
@ -6753,12 +6753,12 @@ DWIDTH 4 0
BBX 4 8 0 -1
BITMAP
00
20
50
30
40
40
50
20
40
40
30
00
ENDCHAR
STARTCHAR GREEK CAPITAL LETTER SAN
@ -7056,9 +7056,9 @@ BITMAP
20
50
50
70
70
50
50
30
00
ENDCHAR
STARTCHAR CYRILLIC CAPITAL LETTER SHORT U
@ -7221,9 +7221,9 @@ BITMAP
50
50
50
70
70
50
50
30
00
ENDCHAR
STARTCHAR CYRILLIC CAPITAL LETTER SHORT I
@ -7236,9 +7236,9 @@ BITMAP
20
50
50
70
70
50
50
30
00
ENDCHAR
STARTCHAR CYRILLIC CAPITAL LETTER KA
@ -7353,12 +7353,12 @@ DWIDTH 4 0
BBX 4 8 0 -1
BITMAP
00
20
50
30
40
40
50
20
40
40
30
00
ENDCHAR
STARTCHAR CYRILLIC CAPITAL LETTER TE
@ -8059,8 +8059,8 @@ BBX 4 8 0 -1
BITMAP
40
20
00
20
50
70
40
30
@ -10076,7 +10076,7 @@ BITMAP
70
70
ENDCHAR
STARTCHAR char8341
STARTCHAR LATIN SUBSCRIPT SMALL LETTER H
ENCODING 8341
SWIDTH 500 0
DWIDTH 4 0
@ -10091,7 +10091,7 @@ BITMAP
50
50
ENDCHAR
STARTCHAR char8342
STARTCHAR LATIN SUBSCRIPT SMALL LETTER K
ENCODING 8342
SWIDTH 500 0
DWIDTH 4 0
@ -10106,7 +10106,7 @@ BITMAP
50
50
ENDCHAR
STARTCHAR char8343
STARTCHAR LATIN SUBSCRIPT SMALL LETTER L
ENCODING 8343
SWIDTH 500 0
DWIDTH 4 0
@ -10121,7 +10121,7 @@ BITMAP
20
10
ENDCHAR
STARTCHAR char8344
STARTCHAR LATIN SUBSCRIPT SMALL LETTER M
ENCODING 8344
SWIDTH 500 0
DWIDTH 4 0
@ -10136,7 +10136,7 @@ BITMAP
70
70
ENDCHAR
STARTCHAR char8345
STARTCHAR LATIN SUBSCRIPT SMALL LETTER N
ENCODING 8345
SWIDTH 500 0
DWIDTH 4 0
@ -10151,7 +10151,7 @@ BITMAP
50
50
ENDCHAR
STARTCHAR char8346
STARTCHAR LATIN SUBSCRIPT SMALL LETTER P
ENCODING 8346
SWIDTH 500 0
DWIDTH 4 0
@ -10166,7 +10166,7 @@ BITMAP
70
40
ENDCHAR
STARTCHAR char8347
STARTCHAR LATIN SUBSCRIPT SMALL LETTER S
ENCODING 8347
SWIDTH 500 0
DWIDTH 4 0
@ -10181,7 +10181,7 @@ BITMAP
30
70
ENDCHAR
STARTCHAR char8348
STARTCHAR LATIN SUBSCRIPT SMALL LETTER T
ENCODING 8348
SWIDTH 500 0
DWIDTH 4 0
@ -10271,7 +10271,7 @@ BITMAP
30
00
ENDCHAR
STARTCHAR char8381
STARTCHAR RUBLE SIGN
ENCODING 8381
SWIDTH 500 0
DWIDTH 4 0

View File

@ -3201,7 +3201,7 @@ BITMAP
18
7E
40
78
7C
40
7E
00
@ -5186,7 +5186,7 @@ BITMAP
08
10
ENDCHAR
STARTCHAR LATIN CAPITAL LETTER J
STARTCHAR GREEK CAPITAL LETTER YOT
ENCODING 895
SWIDTH 1000 0
DWIDTH 8 0
@ -7819,11 +7819,11 @@ BBX 8 8 0 -1
BITMAP
00
00
1C
22
3E
20
1C
38
44
7C
40
38
00
ENDCHAR
STARTCHAR CYRILLIC SMALL LETTER ZHE
@ -11561,7 +11561,7 @@ BITMAP
3C
18
ENDCHAR
STARTCHAR char8341
STARTCHAR LATIN SUBSCRIPT SMALL LETTER H
ENCODING 8341
SWIDTH 1000 0
DWIDTH 8 0
@ -11576,7 +11576,7 @@ BITMAP
24
24
ENDCHAR
STARTCHAR char8342
STARTCHAR LATIN SUBSCRIPT SMALL LETTER K
ENCODING 8342
SWIDTH 1000 0
DWIDTH 8 0
@ -11591,7 +11591,7 @@ BITMAP
28
24
ENDCHAR
STARTCHAR char8343
STARTCHAR LATIN SUBSCRIPT SMALL LETTER L
ENCODING 8343
SWIDTH 1000 0
DWIDTH 8 0
@ -11606,7 +11606,7 @@ BITMAP
10
0C
ENDCHAR
STARTCHAR char8344
STARTCHAR LATIN SUBSCRIPT SMALL LETTER M
ENCODING 8344
SWIDTH 1000 0
DWIDTH 8 0
@ -11621,7 +11621,7 @@ BITMAP
2A
2A
ENDCHAR
STARTCHAR char8345
STARTCHAR LATIN SUBSCRIPT SMALL LETTER N
ENCODING 8345
SWIDTH 1000 0
DWIDTH 8 0
@ -11636,7 +11636,7 @@ BITMAP
24
24
ENDCHAR
STARTCHAR char8346
STARTCHAR LATIN SUBSCRIPT SMALL LETTER P
ENCODING 8346
SWIDTH 1000 0
DWIDTH 8 0
@ -11651,7 +11651,7 @@ BITMAP
38
20
ENDCHAR
STARTCHAR char8347
STARTCHAR LATIN SUBSCRIPT SMALL LETTER S
ENCODING 8347
SWIDTH 1000 0
DWIDTH 8 0
@ -11666,7 +11666,7 @@ BITMAP
0C
38
ENDCHAR
STARTCHAR char8348
STARTCHAR LATIN SUBSCRIPT SMALL LETTER T
ENCODING 8348
SWIDTH 1000 0
DWIDTH 8 0
@ -12056,7 +12056,7 @@ FE
10
00
ENDCHAR
STARTCHAR char8377
STARTCHAR INDIAN RUPEE SIGN
ENCODING 8377
SWIDTH 1000 0
DWIDTH 8 0
@ -12071,7 +12071,7 @@ BITMAP
04
00
ENDCHAR
STARTCHAR char8378
STARTCHAR TURKISH LIRA SIGN
ENCODING 8378
SWIDTH 1000 0
DWIDTH 8 0
@ -12086,7 +12086,7 @@ BITMAP
5C
00
ENDCHAR
STARTCHAR char8379
STARTCHAR NORDIC MARK SIGN
ENCODING 8379
SWIDTH 1000 0
DWIDTH 8 0
@ -12101,7 +12101,7 @@ BITMAP
1C
00
ENDCHAR
STARTCHAR char8380
STARTCHAR MANAT SIGN
ENCODING 8380
SWIDTH 1000 0
DWIDTH 8 0
@ -12116,7 +12116,7 @@ BITMAP
82
00
ENDCHAR
STARTCHAR char8381
STARTCHAR RUBLE SIGN
ENCODING 8381
SWIDTH 1000 0
DWIDTH 8 0
@ -12131,7 +12131,7 @@ BITMAP
20
00
ENDCHAR
STARTCHAR char8382
STARTCHAR LARI SIGN
ENCODING 8382
SWIDTH 1000 0
DWIDTH 8 0
@ -12146,7 +12146,7 @@ BITMAP
7E
00
ENDCHAR
STARTCHAR char8383
STARTCHAR BITCOIN SIGN
ENCODING 8383
SWIDTH 1000 0
DWIDTH 8 0

View File

@ -1777,7 +1777,7 @@ BITMAP
66
66
66
66
6E
7B
60
ENDCHAR
@ -2882,7 +2882,7 @@ SWIDTH 1000 0
DWIDTH 8 0
BBX 8 8 0 -1
BITMAP
24
36
00
66
66
@ -3201,7 +3201,7 @@ BITMAP
3C
FE
C0
F8
FC
C0
FE
00
@ -6833,7 +6833,7 @@ BITMAP
0C
0C
0C
4C
6C
38
ENDCHAR
STARTCHAR GREEK CAPITAL THETA SYMBOL

View File

@ -25,7 +25,7 @@ _GBDFED_INFO "Edited with gbdfed 1.6."
RELATIVE_SETWIDTH 70
RELATIVE_WEIGHT 50
ENDPROPERTIES
CHARS 2491
CHARS 2492
STARTCHAR SPACE
ENCODING 32
SWIDTH 750 0
@ -3198,7 +3198,7 @@ DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
28
38
10
7C
40
78
@ -3213,7 +3213,7 @@ DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
28
38
10
38
44
7C
@ -3558,7 +3558,7 @@ DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
28
38
10
38
10
10
@ -3573,7 +3573,7 @@ DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
28
38
10
00
70
10
@ -4068,7 +4068,7 @@ DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
28
38
10
38
44
44
@ -4083,7 +4083,7 @@ DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
28
38
10
00
38
44
@ -4908,12 +4908,12 @@ DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
00
30
48
38
44
04
04
48
30
44
38
00
ENDCHAR
STARTCHAR LATIN CAPITAL LETTER C WITH HOOK
@ -4923,12 +4923,12 @@ DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
00
14
28
34
48
40
40
24
18
44
38
00
ENDCHAR
STARTCHAR LATIN SMALL LETTER C WITH HOOK
@ -17471,17 +17471,17 @@ BITMAP
10
00
ENDCHAR
STARTCHAR KELVIN SIGN
STARTCHAR LATIN CAPITAL LETTER K
ENCODING 8490
SWIDTH 750 0
DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
00
4C
50
60
50
44
48
70
48
48
44
00
@ -36402,6 +36402,21 @@ FC
FC
ENDCHAR
STARTCHAR char60304
ENCODING 60307
SWIDTH 750 0
DWIDTH 6 0
BBX 6 8 0 -1
BITMAP
F4
E8
F4
E8
F4
E8
F4
E8
ENDCHAR
STARTCHAR char60304
ENCODING 60308
SWIDTH 750 0
DWIDTH 6 0

190
tools/make-equivalents.awk Executable file
View File

@ -0,0 +1,190 @@
#!/usr/bin/awk -f
# script makes and checks graphically equivalent characters (like capital latin a, greek alpha, and cyrillic a)
function assert(expression, message) {
if(!expression) {
print "DEBUG ASSERTION FAILED "message > "/dev/stderr";
exit 1;
}
}
function hex(x, i, y, d) {
y = 0;
for(i = 1; i <= length(x); i++) {
d = index("0123456789ABCDEF", toupper(substr(x, i, 1)));
assert(d > 0 && d <= 16, "Hexadecimal character required");
y = y * 16 + d - 1;
}
return y;
}
function to_hex(x, p, s, j) {
if(p <= 0) {p = 1;}
s = "";
for(j = 0; j < p || x /= 0; j = j + 1) {
s = substr("0123456789ABCDEF", x % 16, 1) s;
x = x / 16;
}
return s;
}
function make_eq (s, A, oc, j) {
split(s, A, " ");
for(j in A) {
A[j] = hex(A[j]);
}
oc = A[1];
for(j = 1; j <= length(A); j = j + 1) {
CH_COPY[A[j]] = oc;
CH_EQU[A[j]] = oc;
}
}
BEGIN {
make_eq("0020 00A0"); # space
make_eq("0041 0391 0410"); # A
make_eq("0042 0392 0412"); # B
make_eq("0043 03F9 0421"); # C
make_eq("0045 0395 0415"); # E
make_eq("0046 03DC"); # F
make_eq("0048 0397 041D"); # H
make_eq("0049 0399 0406"); # I
make_eq("004A 037F 0408"); # J
make_eq("004B 039A 041A 212A"); # K
make_eq("004D 039C 041C"); # M
make_eq("004E 039D"); # N
make_eq("004F 039F 041E"); # O
make_eq("0050 03A1 0420"); # P
make_eq("0051 051A") # Q
make_eq("0053 0405"); # S
make_eq("0054 03A4 0422"); # T
make_eq("0057 051C"); # W
make_eq("0058 03A7 0425"); # X
make_eq("0059 04AE"); # Y
make_eq("005A 0396"); # Z
make_eq("0061 0430"); # a
make_eq("0063 03F2 0441"); # c
make_eq("0065 0435"); # e
make_eq("0068 04BB"); # h
make_eq("0069 0456"); # i
make_eq("006A 03F3 0458"); # j
make_eq("006F 03BF 043E"); # o
make_eq("0070 0440"); # p
make_eq("0071 051B"); # q
make_eq("0073 0455"); # s
make_eq("0077 051D"); # w
make_eq("0078 0445"); # x
make_eq("0079 0443"); # y
make_eq("00B5 03BC"); # μ (micro)
make_eq("00C4 04D2"); # Ä
make_eq("00C5 212B"); # Å (angstrom)
make_eq("00C6 04D4"); # Æ
make_eq("00C8 0400"); # È
make_eq("00CB 0401"); # Ë
make_eq("00CF 0407"); # Ï
make_eq("00D0 0110"); # Ð (eth)
make_eq("00D6 04E6"); # Ö
make_eq("00DE 03F7"); # Þ
make_eq("00E4 04D3"); # ä
make_eq("00E6 04D5"); # æ
make_eq("00E8 0450"); # è
make_eq("00EB 0451"); # ë
make_eq("00EF 0457"); # ï
make_eq("00F6 04E7"); # ö
make_eq("00FE 03F8"); # þ
make_eq("00FF 04F1"); # ÿ
make_eq("0102 04D0"); make_eq("0103 04D1"); # Ă ă
make_eq("0114 04D6"); make_eq("0115 04D7"); # Ĕ ĕ
make_eq("0186 03FD"); # Ɔ
make_eq("018F 04D8"); # Ə
make_eq("0190 0510"); # Ɛ
make_eq("01DD 0259 04D9"); # ǝ
make_eq("0233 04EF"); # ȳ
make_eq("0251 03B1"); # ɑ
make_eq("0254 037B"); # ɔ
make_eq("025B 03B5 0511"); # ɛ
make_eq("025C 0437"); # ɜ
make_eq("0269 03B9 2373 A647"); # ɩ
make_eq("0275 04E9"); # ɵ
make_eq("0299 0432"); # ʙ
make_eq("029C 043D"); # ʜ
make_eq("0376 0418"); make_eq("0377 0438"); # Ͷ ͷ
make_eq("0393 0413"); # Γ
make_eq("0394 2206"); # Δ
make_eq("03A0 041F"); # Π
make_eq("03A6 0424"); # Φ
make_eq("03A9 2126"); # Ω
make_eq("040C 1E30"); # Ќ
}
$1=="STARTFONT" {
enc = -1;
split("", CHAR);
split("", CHAR_REF);
}
function empty_range(first, last, j, c) {
for(j = first; j <= last; j = j + 1) {
if(j in CH_COPY) {
# insert a character
c = CH_COPY[j];
if(c in CHAR) {
print "STARTCHAR", "char" j;
print "ENCODING", j;
printf("%s", CHAR[c]);
printf("%.40s: copied %04X to %04X", FILENAME, CHAR_REF[c], j);
}
}
}
}
$1=="STARTCHAR" {
body = "";
startchar_line = $0; # delay output until ENCODING
next;
}
$1=="SWIDTH" || $1=="DWIDTH" || $1=="BBX" {
body = body $0 "\n";
}
$1=="BITMAP", $1=="ENDCHAR" {
body = body $0 "\n";
}
$1=="ENCODING" {
empty_range(enc + 1, $2 - 1);
enc = $2;
print startchar_line; # delayed until here
}
function add_char(CH_X) {
if(enc in CH_X && !(CH_X[enc] in CHAR)) {
CHAR[CH_X[enc]] = body;
CHAR_REF[CH_X[enc]] = enc;
}
}
$1=="ENDCHAR" {
add_char(CH_EQU);
add_char(CH_COPY);
if(enc in CH_EQU) {
orig = CH_EQU[enc];
if(body!=CHAR[orig]) {
printf("%.40s: %04X /= %04X\n", FILENAME, enc, CHAR_REF[orig]) >> "/dev/stderr";
} else {
#print enc " == " CHAR_REF[orig] >> "/dev/stderr"
}
}
}
$1=="ENDFONT" {
empty_range(enc + 1, hex("10FFFF"));
}
{
# this can be skipped with 'next'
print;
}

5
tools/update-chars.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/sh
N=`grep '^STARTCHAR' "$1" | wc -l`
#echo $N
mv "$1" "$1.1orig"
awk -e '$1=="CHARS" {$2='$N'}; {print}' "$1.1orig" >"$1"