+ script to check accented letters
This commit is contained in:
parent
fec69996c6
commit
670fcfe5e5
|
@ -0,0 +1,64 @@
|
|||
#!/bin/awk -f
|
||||
|
||||
# This script outputs table of accented latin letters
|
||||
# in the form of a text file
|
||||
# Each column corresponds to a base letter
|
||||
# Each row corresponds to accent or accent combination
|
||||
|
||||
# input UnicodeData.txt to this script
|
||||
|
||||
function hex(x, i, y, d) {
|
||||
y = 0;
|
||||
for(i = 1; i <= length(x); i++) {
|
||||
d = index("0123456789ABCDEF", toupper(substr(x, i, 1)));
|
||||
if(!(d > 0 && d <= 16)) {
|
||||
print "Hexadecimal character required" > "/dev/stderr";
|
||||
}
|
||||
y = y * 16 + d - 1;
|
||||
}
|
||||
return y;
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
FS = ";";
|
||||
}
|
||||
|
||||
match($2, "^LATIN (.*) WITH (.*)", A) {
|
||||
if(!(A[1] in LETTERS)) {
|
||||
n_letters = n_letters + 1;
|
||||
LETTERS[A[1]] = n_letters;
|
||||
}
|
||||
x = LETTERS[A[1]];
|
||||
if(!(A[2] in ACCENTS)) {
|
||||
n_accents = n_accents + 1;
|
||||
ACCENTS[A[2]] = n_accents;
|
||||
}
|
||||
y = ACCENTS[A[2]];
|
||||
T[y, x] = sprintf("%c", hex($1));
|
||||
next;
|
||||
}
|
||||
|
||||
match($2, "^LATIN (.*)", A) {
|
||||
BARE[A[1]] = sprintf("%c", hex($1));
|
||||
}
|
||||
|
||||
END {
|
||||
print "Letters: " n_letters;
|
||||
print "Accents: " n_accents;
|
||||
for(t in LETTERS) {
|
||||
if(t in BARE) {
|
||||
T[0, LETTERS[t]] = sprintf("%c", BARE[t]);
|
||||
}
|
||||
}
|
||||
print "";
|
||||
for(y = 0; y <= n_accents; y = y + 1) {
|
||||
for(x = 1; x <= n_letters; x = x + 1) {
|
||||
if((y SUBSEP x) in T) {
|
||||
printf("%c", T[y, x]);
|
||||
} else {
|
||||
printf(" ");
|
||||
}
|
||||
}
|
||||
print "";
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue