Salut_fonts/tools/make-equivalents.awk

204 lines
5.0 KiB
Awk
Raw Permalink Normal View History

#!/usr/bin/awk -f
# script makes and checks graphically equivalent characters (like capital latin a, greek alpha, and cyrillic a)
function assert(expression, message) {
if(!expression) {
print "DEBUG ASSERTION FAILED "message > "/dev/stderr";
exit 1;
}
}
function hex(x, i, y, d) {
y = 0;
for(i = 1; i <= length(x); i++) {
d = index("0123456789ABCDEF", toupper(substr(x, i, 1)));
assert(d > 0 && d <= 16, "Hexadecimal character required");
y = y * 16 + d - 1;
}
return y;
}
function to_hex(x, p, s, j) {
if(p <= 0) {p = 1;}
s = "";
for(j = 0; j < p || x /= 0; j = j + 1) {
s = substr("0123456789ABCDEF", x % 16, 1) s;
x = x / 16;
}
return s;
}
function make_eq (s, A, oc, j) {
split(s, A, " ");
for(j in A) {
A[j] = hex(A[j]);
}
2024-04-06 01:44:39 +02:00
if(A[1] in CH_EQU) {
oc = CH_EQU[A[1]];
} else {
oc = A[1];
}
for(j = 1; j <= length(A); j = j + 1) {
CH_COPY[A[j]] = oc;
CH_EQU[A[j]] = oc;
}
}
2024-04-06 01:44:39 +02:00
function make_eq_roman (s) {
if(slant=="\"R\"") {
make_eq(s);
}
}
function make_eq_italic (s) {
if(slant=="\"I\"") {
make_eq(s);
}
}
BEGIN {
make_eq("0020 00A0"); # space
2024-04-06 01:44:39 +02:00
make_eq("0041 0391 0410"); make_eq("0061 0430"); # A a
make_eq("0042 0392 0412"); # B
2024-04-06 01:44:39 +02:00
make_eq("0043 03F9 0421"); make_eq("0063 03F2 0441 1D04"); # C c
make_eq("0045 0395 0415"); make_eq("0065 0435"); # E e
make_eq("0046 03DC"); # F
2024-04-06 01:44:39 +02:00
make_eq("0048 0397 041D"); make_eq("0068 04BB"); # H h
make_eq("0049 0399 0406"); make_eq("0069 0456"); # I i
make_eq("004A 037F 0408"); make_eq("006A 03F3 0458"); # J j
make_eq("004B 039A 041A 212A"); # K
make_eq("004D 039C 041C"); # M
make_eq("004E 039D"); # N
2024-04-06 01:44:39 +02:00
make_eq("004F 039F 041E"); make_eq("006F 03BF 043E 1D0F"); # O o
make_eq("0050 03A1 0420"); make_eq("0070 0440"); # P p
make_eq("0051 051A") make_eq("0071 051B"); # Q q
make_eq("0053 0405"); make_eq("0073 0455"); # S s
make_eq("0054 03A4 0422"); # T
2023-10-08 21:47:59 +02:00
make_eq("0076 1D20"); # v
2024-04-06 01:44:39 +02:00
make_eq("0057 051C"); make_eq("0077 051D 1D21"); # W w
make_eq("0058 03A7 0425"); make_eq("0078 0445"); # X x
make_eq("0059 04AE"); make_eq("0079 0443"); # Y y
make_eq("005A 0396"); make_eq("007A 1D22"); # Z z
make_eq("00B5 03BC"); # μ (micro)
2024-04-06 01:44:39 +02:00
make_eq("00C4 04D2"); make_eq("00E4 04D3"); # Ä ä
make_eq("00C5 212B"); # Å (angstrom)
2024-04-06 01:44:39 +02:00
make_eq("00C6 04D4"); make_eq("00E6 04D5"); # Æ æ
make_eq("00C8 0400"); make_eq("00E8 0450"); # È è
make_eq("00CB 0401"); make_eq("00EB 0451"); # Ë ë
make_eq("00CF 0407"); make_eq("00EF 0457"); # Ï ï
make_eq("00D0 0110"); # Ð (eth)
2024-04-06 01:44:39 +02:00
make_eq("00D6 04E6"); make_eq("00F6 04E7"); # Ö ö
make_eq("00DE 03F7"); make_eq("00FE 03F8"); # Þ þ
make_eq("00FF 04F1"); # ÿ
make_eq("0102 04D0"); make_eq("0103 04D1"); # Ă ă
make_eq("0114 04D6"); make_eq("0115 04D7"); # Ĕ ĕ
2023-10-08 21:47:59 +02:00
make_eq("0138 03BA 043A 1D0B"); # ĸ
make_eq("0186 03FD"); # Ɔ
make_eq("018F 04D8"); # Ə
make_eq("0190 0510"); # Ɛ
2023-10-08 21:47:59 +02:00
make_eq("01B1 2127"); # Ʊ
make_eq("01DD 0259 04D9"); # ǝ
make_eq("0233 04EF"); # ȳ
2023-10-08 21:47:59 +02:00
make_eq("0251 03B1 237A"); # ɑ
make_eq("0254 037B 1D10"); # ɔ
make_eq("025B 03B5 0511"); # ɛ
make_eq("025C 0437"); # ɜ
make_eq("0269 03B9 2373 A647"); # ɩ
make_eq("0275 04E9"); # ɵ
make_eq("0299 0432"); # ʙ
make_eq("029C 043D"); # ʜ
2024-04-06 01:44:39 +02:00
make_eq("0376 0418"); make_eq_roman("0377 0438"); make_eq_italic("0075 0438") # Ͷ ͷ
make_eq("0393 0413"); # Γ
make_eq("0394 2206"); # Δ
make_eq("03A0 041F"); # Π
make_eq("03A6 0424"); # Φ
make_eq("03A9 2126"); # Ω
2023-10-08 21:47:59 +02:00
make_eq("03C9 0461 2375"); # ω
make_eq("040C 1E30"); # Ќ
2024-04-06 01:44:39 +02:00
make_eq_roman("0433 1D26"); # г
make_eq_roman("0438 1D0E"); # и
2023-10-08 21:47:59 +02:00
make_eq("043B 1D2B"); # л
make_eq("043C 1D0D"); # м
make_eq("043F 1D28"); # п
2023-10-28 20:32:06 +02:00
make_eq("1D18 1D29"); # р
2024-04-06 01:44:39 +02:00
make_eq_roman("0442 1D1B"); # т
2023-10-08 21:47:59 +02:00
make_eq("044F 1D19"); # я
}
$1=="STARTFONT" {
enc = -1;
split("", CHAR);
split("", CHAR_REF);
}
2024-04-06 01:44:39 +02:00
$1=="SLANT" {
slant = $2;
}
function empty_range(first, last, j, c) {
for(j = first; j <= last; j = j + 1) {
if(j in CH_COPY) {
# insert a character
c = CH_COPY[j];
if(c in CHAR) {
print "STARTCHAR", "char" j;
print "ENCODING", j;
printf("%s", CHAR[c]);
2024-04-06 01:44:39 +02:00
printf("%.40s: copied %04X to %04X\n", FILENAME, CHAR_REF[c], j) >> "/dev/stderr";
} else if(c > 0 && c != j) {
printf("%.40s: need %04X for %04X\n", FILENAME, c, j) >> "/dev/stderr";
}
}
}
}
$1=="STARTCHAR" {
body = "";
startchar_line = $0; # delay output until ENCODING
next;
}
$1=="SWIDTH" || $1=="DWIDTH" || $1=="BBX" {
body = body $0 "\n";
}
$1=="BITMAP", $1=="ENDCHAR" {
body = body $0 "\n";
}
$1=="ENCODING" {
empty_range(enc + 1, $2 - 1);
enc = $2;
print startchar_line; # delayed until here
}
function add_char(CH_X) {
if(enc in CH_X && !(CH_X[enc] in CHAR)) {
CHAR[CH_X[enc]] = body;
CHAR_REF[CH_X[enc]] = enc;
}
}
$1=="ENDCHAR" {
add_char(CH_EQU);
add_char(CH_COPY);
if(enc in CH_EQU) {
orig = CH_EQU[enc];
if(body!=CHAR[orig]) {
printf("%.40s: %04X /= %04X\n", FILENAME, enc, CHAR_REF[orig]) >> "/dev/stderr";
} else {
#print enc " == " CHAR_REF[orig] >> "/dev/stderr"
}
}
}
$1=="ENDFONT" {
empty_range(enc + 1, hex("10FFFF"));
}
{
# this can be skipped with 'next'
print;
}