#!/usr/bin/awk -f # script makes and checks graphically equivalent characters (like capital latin a, greek alpha, and cyrillic a) function assert(expression, message) { if(!expression) { print "DEBUG ASSERTION FAILED "message > "/dev/stderr"; exit 1; } } function hex(x, i, y, d) { y = 0; for(i = 1; i <= length(x); i++) { d = index("0123456789ABCDEF", toupper(substr(x, i, 1))); assert(d > 0 && d <= 16, "Hexadecimal character required"); y = y * 16 + d - 1; } return y; } function to_hex(x, p, s, j) { if(p <= 0) {p = 1;} s = ""; for(j = 0; j < p || x /= 0; j = j + 1) { s = substr("0123456789ABCDEF", x % 16, 1) s; x = x / 16; } return s; } function make_eq (s, A, oc, j) { split(s, A, " "); for(j in A) { A[j] = hex(A[j]); } if(A[1] in CH_EQU) { oc = CH_EQU[A[1]]; } else { oc = A[1]; } for(j = 1; j <= length(A); j = j + 1) { CH_COPY[A[j]] = oc; CH_EQU[A[j]] = oc; } } function make_eq_roman (s) { if(slant=="\"R\"") { make_eq(s); } } function make_eq_italic (s) { if(slant=="\"I\"") { make_eq(s); } } BEGIN { make_eq("0020 00A0"); # space make_eq("0041 0391 0410"); make_eq("0061 0430"); # A a make_eq("0042 0392 0412"); # B make_eq("0043 03F9 0421"); make_eq("0063 03F2 0441 1D04"); # C c make_eq("0045 0395 0415"); make_eq("0065 0435"); # E e make_eq("0046 03DC"); # F make_eq("0048 0397 041D"); make_eq("0068 04BB"); # H h make_eq("0049 0399 0406"); make_eq("0069 0456"); # I i make_eq("004A 037F 0408"); make_eq("006A 03F3 0458"); # J j make_eq("004B 039A 041A 212A"); # K make_eq("004D 039C 041C"); # M make_eq("004E 039D"); # N make_eq("004F 039F 041E"); make_eq("006F 03BF 043E 1D0F"); # O o make_eq("0050 03A1 0420"); make_eq("0070 0440"); # P p make_eq("0051 051A") make_eq("0071 051B"); # Q q make_eq("0053 0405"); make_eq("0073 0455"); # S s make_eq("0054 03A4 0422"); # T make_eq("0076 1D20"); # v make_eq("0057 051C"); make_eq("0077 051D 1D21"); # W w make_eq("0058 03A7 0425"); make_eq("0078 0445"); # X x make_eq("0059 04AE"); make_eq("0079 0443"); # Y y make_eq("005A 0396"); make_eq("007A 1D22"); # Z z make_eq("00B5 03BC"); # μ (micro) make_eq("00C4 04D2"); make_eq("00E4 04D3"); # Ä ä make_eq("00C5 212B"); # Å (angstrom) make_eq("00C6 04D4"); make_eq("00E6 04D5"); # Æ æ make_eq("00C8 0400"); make_eq("00E8 0450"); # È è make_eq("00CB 0401"); make_eq("00EB 0451"); # Ë ë make_eq("00CF 0407"); make_eq("00EF 0457"); # Ï ï make_eq("00D0 0110"); # Ð (eth) make_eq("00D6 04E6"); make_eq("00F6 04E7"); # Ö ö make_eq("00DE 03F7"); make_eq("00FE 03F8"); # Þ þ make_eq("00FF 04F1"); # ÿ make_eq("0102 04D0"); make_eq("0103 04D1"); # Ă ă make_eq("0114 04D6"); make_eq("0115 04D7"); # Ĕ ĕ make_eq("0138 03BA 043A 1D0B"); # ĸ make_eq("0186 03FD"); # Ɔ make_eq("018F 04D8"); # Ə make_eq("0190 0510"); # Ɛ make_eq("01B1 2127"); # Ʊ make_eq("01DD 0259 04D9"); # ǝ make_eq("0233 04EF"); # ȳ make_eq("0251 03B1 237A"); # ɑ make_eq("0254 037B 1D10"); # ɔ make_eq("025B 03B5 0511"); # ɛ make_eq("025C 0437"); # ɜ make_eq("0269 03B9 2373 A647"); # ɩ make_eq("0275 04E9"); # ɵ make_eq("0299 0432"); # ʙ make_eq("029C 043D"); # ʜ make_eq("0376 0418"); make_eq_roman("0377 0438"); make_eq_italic("0075 0438") # Ͷ ͷ make_eq("0393 0413"); # Γ make_eq("0394 2206"); # Δ make_eq("03A0 041F"); # Π make_eq("03A6 0424"); # Φ make_eq("03A9 2126"); # Ω make_eq("03C9 0461 2375"); # ω make_eq("040C 1E30"); # Ќ make_eq_roman("0433 1D26"); # г make_eq_roman("0438 1D0E"); # и make_eq("043B 1D2B"); # л make_eq("043C 1D0D"); # м make_eq("043F 1D28"); # п make_eq("1D18 1D29"); # р make_eq_roman("0442 1D1B"); # т make_eq("044F 1D19"); # я } $1=="STARTFONT" { enc = -1; split("", CHAR); split("", CHAR_REF); } $1=="SLANT" { slant = $2; } function empty_range(first, last, j, c) { for(j = first; j <= last; j = j + 1) { if(j in CH_COPY) { # insert a character c = CH_COPY[j]; if(c in CHAR) { print "STARTCHAR", "char" j; print "ENCODING", j; printf("%s", CHAR[c]); printf("%.40s: copied %04X to %04X\n", FILENAME, CHAR_REF[c], j) >> "/dev/stderr"; } else if(c > 0 && c != j) { printf("%.40s: need %04X for %04X\n", FILENAME, c, j) >> "/dev/stderr"; } } } } $1=="STARTCHAR" { body = ""; startchar_line = $0; # delay output until ENCODING next; } $1=="SWIDTH" || $1=="DWIDTH" || $1=="BBX" { body = body $0 "\n"; } $1=="BITMAP", $1=="ENDCHAR" { body = body $0 "\n"; } $1=="ENCODING" { empty_range(enc + 1, $2 - 1); enc = $2; print startchar_line; # delayed until here } function add_char(CH_X) { if(enc in CH_X && !(CH_X[enc] in CHAR)) { CHAR[CH_X[enc]] = body; CHAR_REF[CH_X[enc]] = enc; } } $1=="ENDCHAR" { add_char(CH_EQU); add_char(CH_COPY); if(enc in CH_EQU) { orig = CH_EQU[enc]; if(body!=CHAR[orig]) { printf("%.40s: %04X /= %04X\n", FILENAME, enc, CHAR_REF[orig]) >> "/dev/stderr"; } else { #print enc " == " CHAR_REF[orig] >> "/dev/stderr" } } } $1=="ENDFONT" { empty_range(enc + 1, hex("10FFFF")); } { # this can be skipped with 'next' print; }