Salut_fonts/tools/make-equivalents.awk

204 lines
5.0 KiB
Awk
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/awk -f
# script makes and checks graphically equivalent characters (like capital latin a, greek alpha, and cyrillic a)
function assert(expression, message) {
if(!expression) {
print "DEBUG ASSERTION FAILED "message > "/dev/stderr";
exit 1;
}
}
function hex(x, i, y, d) {
y = 0;
for(i = 1; i <= length(x); i++) {
d = index("0123456789ABCDEF", toupper(substr(x, i, 1)));
assert(d > 0 && d <= 16, "Hexadecimal character required");
y = y * 16 + d - 1;
}
return y;
}
function to_hex(x, p, s, j) {
if(p <= 0) {p = 1;}
s = "";
for(j = 0; j < p || x /= 0; j = j + 1) {
s = substr("0123456789ABCDEF", x % 16, 1) s;
x = x / 16;
}
return s;
}
function make_eq (s, A, oc, j) {
split(s, A, " ");
for(j in A) {
A[j] = hex(A[j]);
}
if(A[1] in CH_EQU) {
oc = CH_EQU[A[1]];
} else {
oc = A[1];
}
for(j = 1; j <= length(A); j = j + 1) {
CH_COPY[A[j]] = oc;
CH_EQU[A[j]] = oc;
}
}
function make_eq_roman (s) {
if(slant=="\"R\"") {
make_eq(s);
}
}
function make_eq_italic (s) {
if(slant=="\"I\"") {
make_eq(s);
}
}
BEGIN {
make_eq("0020 00A0"); # space
make_eq("0041 0391 0410"); make_eq("0061 0430"); # A a
make_eq("0042 0392 0412"); # B
make_eq("0043 03F9 0421"); make_eq("0063 03F2 0441 1D04"); # C c
make_eq("0045 0395 0415"); make_eq("0065 0435"); # E e
make_eq("0046 03DC"); # F
make_eq("0048 0397 041D"); make_eq("0068 04BB"); # H h
make_eq("0049 0399 0406"); make_eq("0069 0456"); # I i
make_eq("004A 037F 0408"); make_eq("006A 03F3 0458"); # J j
make_eq("004B 039A 041A 212A"); # K
make_eq("004D 039C 041C"); # M
make_eq("004E 039D"); # N
make_eq("004F 039F 041E"); make_eq("006F 03BF 043E 1D0F"); # O o
make_eq("0050 03A1 0420"); make_eq("0070 0440"); # P p
make_eq("0051 051A") make_eq("0071 051B"); # Q q
make_eq("0053 0405"); make_eq("0073 0455"); # S s
make_eq("0054 03A4 0422"); # T
make_eq("0076 1D20"); # v
make_eq("0057 051C"); make_eq("0077 051D 1D21"); # W w
make_eq("0058 03A7 0425"); make_eq("0078 0445"); # X x
make_eq("0059 04AE"); make_eq("0079 0443"); # Y y
make_eq("005A 0396"); make_eq("007A 1D22"); # Z z
make_eq("00B5 03BC"); # μ (micro)
make_eq("00C4 04D2"); make_eq("00E4 04D3"); # Ä ä
make_eq("00C5 212B"); # Å (angstrom)
make_eq("00C6 04D4"); make_eq("00E6 04D5"); # Æ æ
make_eq("00C8 0400"); make_eq("00E8 0450"); # È è
make_eq("00CB 0401"); make_eq("00EB 0451"); # Ë ë
make_eq("00CF 0407"); make_eq("00EF 0457"); # Ï ï
make_eq("00D0 0110"); # Ð (eth)
make_eq("00D6 04E6"); make_eq("00F6 04E7"); # Ö ö
make_eq("00DE 03F7"); make_eq("00FE 03F8"); # Þ þ
make_eq("00FF 04F1"); # ÿ
make_eq("0102 04D0"); make_eq("0103 04D1"); # Ă ă
make_eq("0114 04D6"); make_eq("0115 04D7"); # Ĕ ĕ
make_eq("0138 03BA 043A 1D0B"); # ĸ
make_eq("0186 03FD"); # Ɔ
make_eq("018F 04D8"); # Ə
make_eq("0190 0510"); # Ɛ
make_eq("01B1 2127"); # Ʊ
make_eq("01DD 0259 04D9"); # ǝ
make_eq("0233 04EF"); # ȳ
make_eq("0251 03B1 237A"); # ɑ
make_eq("0254 037B 1D10"); # ɔ
make_eq("025B 03B5 0511"); # ɛ
make_eq("025C 0437"); # ɜ
make_eq("0269 03B9 2373 A647"); # ɩ
make_eq("0275 04E9"); # ɵ
make_eq("0299 0432"); # ʙ
make_eq("029C 043D"); # ʜ
make_eq("0376 0418"); make_eq_roman("0377 0438"); make_eq_italic("0075 0438") # Ͷ ͷ
make_eq("0393 0413"); # Γ
make_eq("0394 2206"); # Δ
make_eq("03A0 041F"); # Π
make_eq("03A6 0424"); # Φ
make_eq("03A9 2126"); # Ω
make_eq("03C9 0461 2375"); # ω
make_eq("040C 1E30"); # Ќ
make_eq_roman("0433 1D26"); # г
make_eq_roman("0438 1D0E"); # и
make_eq("043B 1D2B"); # л
make_eq("043C 1D0D"); # м
make_eq("043F 1D28"); # п
make_eq("1D18 1D29"); # р
make_eq_roman("0442 1D1B"); # т
make_eq("044F 1D19"); # я
}
$1=="STARTFONT" {
enc = -1;
split("", CHAR);
split("", CHAR_REF);
}
$1=="SLANT" {
slant = $2;
}
function empty_range(first, last, j, c) {
for(j = first; j <= last; j = j + 1) {
if(j in CH_COPY) {
# insert a character
c = CH_COPY[j];
if(c in CHAR) {
print "STARTCHAR", "char" j;
print "ENCODING", j;
printf("%s", CHAR[c]);
printf("%.40s: copied %04X to %04X\n", FILENAME, CHAR_REF[c], j) >> "/dev/stderr";
} else if(c > 0 && c != j) {
printf("%.40s: need %04X for %04X\n", FILENAME, c, j) >> "/dev/stderr";
}
}
}
}
$1=="STARTCHAR" {
body = "";
startchar_line = $0; # delay output until ENCODING
next;
}
$1=="SWIDTH" || $1=="DWIDTH" || $1=="BBX" {
body = body $0 "\n";
}
$1=="BITMAP", $1=="ENDCHAR" {
body = body $0 "\n";
}
$1=="ENCODING" {
empty_range(enc + 1, $2 - 1);
enc = $2;
print startchar_line; # delayed until here
}
function add_char(CH_X) {
if(enc in CH_X && !(CH_X[enc] in CHAR)) {
CHAR[CH_X[enc]] = body;
CHAR_REF[CH_X[enc]] = enc;
}
}
$1=="ENDCHAR" {
add_char(CH_EQU);
add_char(CH_COPY);
if(enc in CH_EQU) {
orig = CH_EQU[enc];
if(body!=CHAR[orig]) {
printf("%.40s: %04X /= %04X\n", FILENAME, enc, CHAR_REF[orig]) >> "/dev/stderr";
} else {
#print enc " == " CHAR_REF[orig] >> "/dev/stderr"
}
}
}
$1=="ENDFONT" {
empty_range(enc + 1, hex("10FFFF"));
}
{
# this can be skipped with 'next'
print;
}