keyboard-genetics/src/data.js

78 lines
2.0 KiB
JavaScript
Raw Normal View History

2016-02-25 01:12:42 +01:00
/**
* Imports data and builds the library
*/
const fs = require("fs");
2016-03-28 02:36:51 +02:00
const { execSync } = require("child_process");
2016-02-25 01:12:42 +01:00
2016-03-28 02:36:51 +02:00
const DOCS_CMD = "find . -type f -name *.md | xargs cat $1";
2016-03-29 12:58:31 +02:00
// const docs = exports.docs = execSync(DOCS_CMD, {timeout: 0, maxBuffer: 1024 * 1024 * 1024}).toString().replace(/\n```[\s\S]+?\n```\n/g, "");
2016-10-24 11:08:27 +02:00
const text = exports.text = fs.readdirSync("text")
.filter(filename => /\.txt$/.test(filename))
.map(filename => fs.readFileSync(`text/${filename}`))
.join("\n\n");
2016-03-01 11:18:50 +01:00
2016-03-28 02:36:51 +02:00
const CODE_CMD = "find . -type f -name *.js | xargs cat $1";
2016-06-26 09:32:00 +02:00
const code = exports.code = execSync(CODE_CMD, {timeout: 0, maxBuffer: 1024 * 1024 * 1024}).toString();
2016-02-25 01:12:42 +01:00
2022-08-18 20:53:45 +02:00
// FIXME (no source, incorrect weights)
const POPULAR_TRIGRAMS = {
2022-10-20 19:01:54 +02:00
cnd: 80000000, // ств
2022-08-18 20:53:45 +02:00
cnj: 59623899, // сто
tyj: 27088636, // ено
yjd: 19494469, // нов
njd: 13977786, // тов
jdj: 11059185, // ово
2022-10-20 19:01:54 +02:00
tdf: 10141992, // ева
2022-08-18 20:53:45 +02:00
jdf: 10141992, // ова
};
2022-08-18 20:53:45 +02:00
// FIXME (no source, incorrect weights)
const POPULAR_BIGRAMS = {
2022-08-18 20:53:45 +02:00
cn: 92535489, // ст
yj: 87741289, // но
ty: 54433847, // ен
nj: 51910883, // то
yf: 51015163, // на
jd: 41694599, // ов
yb: 37466077, // ни
hf: 33802063, // ра
dj: 32967758, // во
rj: 31830493, // ко
};
const trigrams = exports.trigrams = generate_text_from(POPULAR_TRIGRAMS);
const bigrams = exports.bigrams = generate_text_from(POPULAR_BIGRAMS);
function generate_text_from(dictionary) {
let total = 0;
for (let key in dictionary) {
total += dictionary[key];
}
let tokens = [];
for (let key in dictionary) {
const percent = Math.round(dictionary[key] / total * 100);
tokens.push(repeat(key, percent));
tokens.push(repeat(titleize(key), percent));
}
return tokens.join(" ");
}
function repeat(string, times) {
let result = [];
for (let i=0; i < times; i++) {
result.push(string);
}
return result.join(" ");
}
function titleize(str) {
return str[0].toUpperCase() + str.substr(1);
}