64 lines
1.2 KiB
Awk
64 lines
1.2 KiB
Awk
#!/bin/awk -f
|
|
|
|
# This script outputs table of accented latin letters
|
|
# in the form of a text file
|
|
# Each column corresponds to a base letter
|
|
# Each row corresponds to accent or accent combination
|
|
|
|
# input UnicodeData.txt to this script
|
|
|
|
function hex(x, i, y, d) {
|
|
y = 0;
|
|
for(i = 1; i <= length(x); i++) {
|
|
d = index("0123456789ABCDEF", toupper(substr(x, i, 1)));
|
|
if(!(d > 0 && d <= 16)) {
|
|
print "Hexadecimal character required" > "/dev/stderr";
|
|
}
|
|
y = y * 16 + d - 1;
|
|
}
|
|
return y;
|
|
}
|
|
|
|
BEGIN {
|
|
FS = ";";
|
|
}
|
|
|
|
match($2, "^LATIN (.*) WITH (.*)", A) {
|
|
if(!(A[1] in LETTERS)) {
|
|
n_letters = n_letters + 1;
|
|
LETTERS[A[1]] = n_letters;
|
|
}
|
|
x = LETTERS[A[1]];
|
|
if(!(A[2] in ACCENTS)) {
|
|
n_accents = n_accents + 1;
|
|
ACCENTS[A[2]] = n_accents;
|
|
}
|
|
y = ACCENTS[A[2]];
|
|
T[y, x] = sprintf("%c", hex($1));
|
|
next;
|
|
}
|
|
|
|
match($2, "^LATIN (.*)", A) {
|
|
BARE[A[1]] = sprintf("%c", hex($1));
|
|
}
|
|
|
|
END {
|
|
print "Letters: " n_letters;
|
|
print "Accents: " n_accents;
|
|
for(t in LETTERS) {
|
|
if(t in BARE) {
|
|
T[0, LETTERS[t]] = sprintf("%c", BARE[t]);
|
|
}
|
|
}
|
|
print "";
|
|
for(y = 0; y <= n_accents; y = y + 1) {
|
|
for(x = 1; x <= n_letters; x = x + 1) {
|
|
if((y SUBSEP x) in T) {
|
|
printf("%c", T[y, x]);
|
|
} else {
|
|
printf(" ");
|
|
}
|
|
}
|
|
print "";
|
|
}
|
|
} |