keyboard-genetics/src/data.js

78 lines
2.0 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Imports data and builds the library
*/
const fs = require("fs");
const { execSync } = require("child_process");
const DOCS_CMD = "find . -type f -name *.md | xargs cat $1";
// const docs = exports.docs = execSync(DOCS_CMD, {timeout: 0, maxBuffer: 1024 * 1024 * 1024}).toString().replace(/\n```[\s\S]+?\n```\n/g, "");
const text = exports.text = fs.readdirSync("text")
.filter(filename => /\.txt$/.test(filename))
.map(filename => fs.readFileSync(`text/${filename}`))
.join("\n\n");
const CODE_CMD = "find . -type f -name *.js | xargs cat $1";
const code = exports.code = execSync(CODE_CMD, {timeout: 0, maxBuffer: 1024 * 1024 * 1024}).toString();
// FIXME (no source, incorrect weights)
const POPULAR_TRIGRAMS = {
cnd: 80000000, // ств
cnj: 59623899, // сто
tyj: 27088636, // ено
yjd: 19494469, // нов
njd: 13977786, // тов
jdj: 11059185, // ово
tdf: 10141992, // ева
jdf: 10141992, // ова
};
// FIXME (no source, incorrect weights)
const POPULAR_BIGRAMS = {
cn: 92535489, // ст
yj: 87741289, // но
ty: 54433847, // ен
nj: 51910883, // то
yf: 51015163, // на
jd: 41694599, // ов
yb: 37466077, // ни
hf: 33802063, // ра
dj: 32967758, // во
rj: 31830493, // ко
};
const trigrams = exports.trigrams = generate_text_from(POPULAR_TRIGRAMS);
const bigrams = exports.bigrams = generate_text_from(POPULAR_BIGRAMS);
function generate_text_from(dictionary) {
let total = 0;
for (let key in dictionary) {
total += dictionary[key];
}
let tokens = [];
for (let key in dictionary) {
const percent = Math.round(dictionary[key] / total * 100);
tokens.push(repeat(key, percent));
tokens.push(repeat(titleize(key), percent));
}
return tokens.join(" ");
}
function repeat(string, times) {
let result = [];
for (let i=0; i < times; i++) {
result.push(string);
}
return result.join(" ");
}
function titleize(str) {
return str[0].toUpperCase() + str.substr(1);
}