/** * Imports data and builds the library */ const fs = require("fs"); const { execSync } = require("child_process"); const DOCS_CMD = "find . -type f -name *.md | xargs cat $1"; // const docs = exports.docs = execSync(DOCS_CMD, {timeout: 0, maxBuffer: 1024 * 1024 * 1024}).toString().replace(/\n```[\s\S]+?\n```\n/g, ""); const text = exports.text = fs.readdirSync("text") .filter(filename => /\.txt$/.test(filename)) .map(filename => fs.readFileSync(`text/${filename}`)) .join("\n\n"); const CODE_CMD = "find . -type f -name *.js | xargs cat $1"; const code = exports.code = execSync(CODE_CMD, {timeout: 0, maxBuffer: 1024 * 1024 * 1024}).toString(); // FIXME (no source, incorrect weights) const POPULAR_TRIGRAMS = { cnd: 80000000, // ств cnj: 59623899, // сто tyj: 27088636, // ено yjd: 19494469, // нов njd: 13977786, // тов jdj: 11059185, // ово tdf: 10141992, // ева jdf: 10141992, // ова }; // FIXME (no source, incorrect weights) const POPULAR_BIGRAMS = { cn: 92535489, // ст yj: 87741289, // но ty: 54433847, // ен nj: 51910883, // то yf: 51015163, // на jd: 41694599, // ов yb: 37466077, // ни hf: 33802063, // ра dj: 32967758, // во rj: 31830493, // ко }; const trigrams = exports.trigrams = generate_text_from(POPULAR_TRIGRAMS); const bigrams = exports.bigrams = generate_text_from(POPULAR_BIGRAMS); function generate_text_from(dictionary) { let total = 0; for (let key in dictionary) { total += dictionary[key]; } let tokens = []; for (let key in dictionary) { const percent = Math.round(dictionary[key] / total * 100); tokens.push(repeat(key, percent)); tokens.push(repeat(titleize(key), percent)); } return tokens.join(" "); } function repeat(string, times) { let result = []; for (let i=0; i < times; i++) { result.push(string); } return result.join(" "); } function titleize(str) { return str[0].toUpperCase() + str.substr(1); }