2023-10-07 21:14:39 +02:00
|
|
|
|
#!/usr/bin/awk -f
|
|
|
|
|
|
|
|
|
|
# script makes and checks graphically equivalent characters (like capital latin a, greek alpha, and cyrillic a)
|
|
|
|
|
|
|
|
|
|
function assert(expression, message) {
|
|
|
|
|
if(!expression) {
|
|
|
|
|
print "DEBUG ASSERTION FAILED "message > "/dev/stderr";
|
|
|
|
|
exit 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function hex(x, i, y, d) {
|
|
|
|
|
y = 0;
|
|
|
|
|
for(i = 1; i <= length(x); i++) {
|
|
|
|
|
d = index("0123456789ABCDEF", toupper(substr(x, i, 1)));
|
|
|
|
|
assert(d > 0 && d <= 16, "Hexadecimal character required");
|
|
|
|
|
y = y * 16 + d - 1;
|
|
|
|
|
}
|
|
|
|
|
return y;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function to_hex(x, p, s, j) {
|
|
|
|
|
if(p <= 0) {p = 1;}
|
|
|
|
|
s = "";
|
|
|
|
|
for(j = 0; j < p || x /= 0; j = j + 1) {
|
|
|
|
|
s = substr("0123456789ABCDEF", x % 16, 1) s;
|
|
|
|
|
x = x / 16;
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function make_eq (s, A, oc, j) {
|
|
|
|
|
split(s, A, " ");
|
|
|
|
|
for(j in A) {
|
|
|
|
|
A[j] = hex(A[j]);
|
|
|
|
|
}
|
2024-04-06 01:44:39 +02:00
|
|
|
|
if(A[1] in CH_EQU) {
|
|
|
|
|
oc = CH_EQU[A[1]];
|
|
|
|
|
} else {
|
|
|
|
|
oc = A[1];
|
|
|
|
|
}
|
2023-10-07 21:14:39 +02:00
|
|
|
|
for(j = 1; j <= length(A); j = j + 1) {
|
|
|
|
|
CH_COPY[A[j]] = oc;
|
|
|
|
|
CH_EQU[A[j]] = oc;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-06 01:44:39 +02:00
|
|
|
|
function make_eq_roman (s) {
|
|
|
|
|
if(slant=="\"R\"") {
|
|
|
|
|
make_eq(s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function make_eq_italic (s) {
|
|
|
|
|
if(slant=="\"I\"") {
|
|
|
|
|
make_eq(s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-07 21:14:39 +02:00
|
|
|
|
BEGIN {
|
|
|
|
|
make_eq("0020 00A0"); # space
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("0041 0391 0410"); make_eq("0061 0430"); # A a
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("0042 0392 0412"); # B
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("0043 03F9 0421"); make_eq("0063 03F2 0441 1D04"); # C c
|
|
|
|
|
make_eq("0045 0395 0415"); make_eq("0065 0435"); # E e
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("0046 03DC"); # F
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("0048 0397 041D"); make_eq("0068 04BB"); # H h
|
|
|
|
|
make_eq("0049 0399 0406"); make_eq("0069 0456"); # I i
|
|
|
|
|
make_eq("004A 037F 0408"); make_eq("006A 03F3 0458"); # J j
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("004B 039A 041A 212A"); # K
|
|
|
|
|
make_eq("004D 039C 041C"); # M
|
|
|
|
|
make_eq("004E 039D"); # N
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("004F 039F 041E"); make_eq("006F 03BF 043E 1D0F"); # O o
|
|
|
|
|
make_eq("0050 03A1 0420"); make_eq("0070 0440"); # P p
|
|
|
|
|
make_eq("0051 051A") make_eq("0071 051B"); # Q q
|
|
|
|
|
make_eq("0053 0405"); make_eq("0073 0455"); # S s
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("0054 03A4 0422"); # T
|
2023-10-08 21:47:59 +02:00
|
|
|
|
make_eq("0076 1D20"); # v
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("0057 051C"); make_eq("0077 051D 1D21"); # W w
|
|
|
|
|
make_eq("0058 03A7 0425"); make_eq("0078 0445"); # X x
|
|
|
|
|
make_eq("0059 04AE"); make_eq("0079 0443"); # Y y
|
|
|
|
|
make_eq("005A 0396"); make_eq("007A 1D22"); # Z z
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("00B5 03BC"); # μ (micro)
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("00C4 04D2"); make_eq("00E4 04D3"); # Ä ä
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("00C5 212B"); # Å (angstrom)
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("00C6 04D4"); make_eq("00E6 04D5"); # Æ æ
|
|
|
|
|
make_eq("00C8 0400"); make_eq("00E8 0450"); # È è
|
|
|
|
|
make_eq("00CB 0401"); make_eq("00EB 0451"); # Ë ë
|
|
|
|
|
make_eq("00CF 0407"); make_eq("00EF 0457"); # Ï ï
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("00D0 0110"); # Ð (eth)
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("00D6 04E6"); make_eq("00F6 04E7"); # Ö ö
|
|
|
|
|
make_eq("00DE 03F7"); make_eq("00FE 03F8"); # Þ þ
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("00FF 04F1"); # ÿ
|
|
|
|
|
make_eq("0102 04D0"); make_eq("0103 04D1"); # Ă ă
|
|
|
|
|
make_eq("0114 04D6"); make_eq("0115 04D7"); # Ĕ ĕ
|
2023-10-08 21:47:59 +02:00
|
|
|
|
make_eq("0138 03BA 043A 1D0B"); # ĸ
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("0186 03FD"); # Ɔ
|
|
|
|
|
make_eq("018F 04D8"); # Ə
|
|
|
|
|
make_eq("0190 0510"); # Ɛ
|
2023-10-08 21:47:59 +02:00
|
|
|
|
make_eq("01B1 2127"); # Ʊ
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("01DD 0259 04D9"); # ǝ
|
|
|
|
|
make_eq("0233 04EF"); # ȳ
|
2023-10-08 21:47:59 +02:00
|
|
|
|
make_eq("0251 03B1 237A"); # ɑ
|
|
|
|
|
make_eq("0254 037B 1D10"); # ɔ
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("025B 03B5 0511"); # ɛ
|
|
|
|
|
make_eq("025C 0437"); # ɜ
|
|
|
|
|
make_eq("0269 03B9 2373 A647"); # ɩ
|
|
|
|
|
make_eq("0275 04E9"); # ɵ
|
|
|
|
|
make_eq("0299 0432"); # ʙ
|
|
|
|
|
make_eq("029C 043D"); # ʜ
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq("0376 0418"); make_eq_roman("0377 0438"); make_eq_italic("0075 0438") # Ͷ ͷ
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("0393 0413"); # Γ
|
|
|
|
|
make_eq("0394 2206"); # Δ
|
|
|
|
|
make_eq("03A0 041F"); # Π
|
|
|
|
|
make_eq("03A6 0424"); # Φ
|
|
|
|
|
make_eq("03A9 2126"); # Ω
|
2023-10-08 21:47:59 +02:00
|
|
|
|
make_eq("03C9 0461 2375"); # ω
|
2023-10-07 21:14:39 +02:00
|
|
|
|
make_eq("040C 1E30"); # Ќ
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq_roman("0433 1D26"); # г
|
|
|
|
|
make_eq_roman("0438 1D0E"); # и
|
2023-10-08 21:47:59 +02:00
|
|
|
|
make_eq("043B 1D2B"); # л
|
|
|
|
|
make_eq("043C 1D0D"); # м
|
|
|
|
|
make_eq("043F 1D28"); # п
|
2023-10-28 20:32:06 +02:00
|
|
|
|
make_eq("1D18 1D29"); # р
|
2024-04-06 01:44:39 +02:00
|
|
|
|
make_eq_roman("0442 1D1B"); # т
|
2023-10-08 21:47:59 +02:00
|
|
|
|
make_eq("044F 1D19"); # я
|
2023-10-07 21:14:39 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$1=="STARTFONT" {
|
|
|
|
|
enc = -1;
|
|
|
|
|
split("", CHAR);
|
|
|
|
|
split("", CHAR_REF);
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-06 01:44:39 +02:00
|
|
|
|
$1=="SLANT" {
|
|
|
|
|
slant = $2;
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-07 21:14:39 +02:00
|
|
|
|
function empty_range(first, last, j, c) {
|
|
|
|
|
for(j = first; j <= last; j = j + 1) {
|
|
|
|
|
if(j in CH_COPY) {
|
|
|
|
|
# insert a character
|
|
|
|
|
c = CH_COPY[j];
|
|
|
|
|
if(c in CHAR) {
|
|
|
|
|
print "STARTCHAR", "char" j;
|
|
|
|
|
print "ENCODING", j;
|
|
|
|
|
printf("%s", CHAR[c]);
|
2024-04-06 01:44:39 +02:00
|
|
|
|
printf("%.40s: copied %04X to %04X\n", FILENAME, CHAR_REF[c], j) >> "/dev/stderr";
|
|
|
|
|
} else if(c > 0 && c != j) {
|
|
|
|
|
printf("%.40s: need %04X for %04X\n", FILENAME, c, j) >> "/dev/stderr";
|
2023-10-07 21:14:39 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$1=="STARTCHAR" {
|
|
|
|
|
body = "";
|
|
|
|
|
startchar_line = $0; # delay output until ENCODING
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$1=="SWIDTH" || $1=="DWIDTH" || $1=="BBX" {
|
|
|
|
|
body = body $0 "\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$1=="BITMAP", $1=="ENDCHAR" {
|
|
|
|
|
body = body $0 "\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$1=="ENCODING" {
|
|
|
|
|
empty_range(enc + 1, $2 - 1);
|
|
|
|
|
enc = $2;
|
|
|
|
|
|
|
|
|
|
print startchar_line; # delayed until here
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function add_char(CH_X) {
|
|
|
|
|
if(enc in CH_X && !(CH_X[enc] in CHAR)) {
|
|
|
|
|
CHAR[CH_X[enc]] = body;
|
|
|
|
|
CHAR_REF[CH_X[enc]] = enc;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$1=="ENDCHAR" {
|
|
|
|
|
add_char(CH_EQU);
|
|
|
|
|
add_char(CH_COPY);
|
|
|
|
|
if(enc in CH_EQU) {
|
|
|
|
|
orig = CH_EQU[enc];
|
|
|
|
|
if(body!=CHAR[orig]) {
|
|
|
|
|
printf("%.40s: %04X /= %04X\n", FILENAME, enc, CHAR_REF[orig]) >> "/dev/stderr";
|
|
|
|
|
} else {
|
|
|
|
|
#print enc " == " CHAR_REF[orig] >> "/dev/stderr"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$1=="ENDFONT" {
|
|
|
|
|
empty_range(enc + 1, hex("10FFFF"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
# this can be skipped with 'next'
|
|
|
|
|
print;
|
|
|
|
|
}
|