alchi/src/alchi-book/scripts/translate.js

744 lines
26 KiB
JavaScript

// TODO translate svg files
// TODO refactor src/pages/*.html to use <lang.de> instead of <de> etc. -> make tags unambiguous
// e.g. <tr> can be turkish or table row
// TODO avoid double inserts
// TODO verify: all text-fragments are inserted
// TODO auto-detect sourceLangList from files in src/pages/*.html
// TODO post-process old and manual translations
// -> add/update rev="en#xxxxxxxx" (revision ID) to keep translations in sync
// use different encoding than base64? base32 or base16 (hex) -> better for filenames
// -> easier to build a content-addressable store to cache old versions
// con: text fragments are small -> use one large database file, e.g. jsonlines format
// collision safety? git uses short IDs of only 7 chars in base16
// 12 safe symbols: []^'*-/_{}<>
const codeNumKey = "^'*-/_{}<>"; // 10 digits
const codeNumRegexCharClass = "\\^'*-/_{}<>"; // escape ^ for regex character class
const removeRegexCharClass = [
'\u200B', // ZERO WIDTH SPACE from google https://stackoverflow.com/questions/36744793
].join('');
const artifactsRegexCharClass = removeRegexCharClass + [
' ', // space
].join('');
const codeNumRegexCharClassImport = codeNumRegexCharClass + artifactsRegexCharClass;
const encodeNumTable = Object.fromEntries(codeNumKey.split('').map((c, i) => [i, c]));
function encodeNum(num) {
return num.toString().split('').map(i => encodeNumTable[i]).join('');
}
const decodeNumTable = Object.fromEntries(codeNumKey.split('').map((c, i) => [c, i]));
function decodeNum(str) {
return parseInt(str.replace(/\s+/sg, '').split('').map(c => decodeNumTable[c]).join(''));
}
const dryRunExport = 0;
const dryRunImport = 0;
const showDebug = 1;
const charLimit = 5000; // limit of google, deepl
//const charLimit = 1000; // good page size for manual translations or debugging
let useXml = false;
let translatorName = 'google';
function main() {
const argv = process.argv.slice(1); // argv[0] is node
const langMap = {
zh: 'zh-CN', // simplified chinese
};
function getLang(str) {
if (str && str in langMap) return langMap[str];
return str;
}
const sourceLang = getLang(argv[1]);
const targetLang = getLang(argv[2]);
const inputFile = argv[3];
//const translatorName = 'google';
translatorName = (
translatorLangs.deepl.includes(targetLang) ? 'deepl' :
'google'
);
// DEBUG
translatorName = 'google';
// xml is broken in all translators
// -> encode to "symbols in square braces"
// which are preserved by all translators
const xmlTranslators = [
//'deepl', // not really. some xml is preserved, some xml is translated -> not usable
];
useXml = xmlTranslators.includes(translatorName);
if (sourceLang && targetLang && inputFile && fs.existsSync(inputFile)) {
importLang(sourceLang, targetLang, inputFile);
}
else if (sourceLang && targetLang) {
exportLang(sourceLang, targetLang);
}
else {
showHelp();
}
}
function showHelp() {
const scriptName = 'scripts/' + __filename.split('/').pop();
console.log(
'usage:\n' +
`node ${scriptName} <sourceLang> <targetLang>\n` +
`node ${scriptName} <sourceLang> <targetLang> <translationFile>\n` +
'\n' +
'sample:\n' +
`node ${scriptName} de en # from source files, generate translate-de2en.html\n` +
`# manually create translate-de2en.txt\n` +
`node ${scriptName} de en translate-de2en.txt # add <en auto t="${nowDate}">...</en> tags to source files\n` +
`# manually fix the translations, and replace <en auto t="${nowDate}"> with <en>\n`
//`node ${scriptName} translate-de2en.txt en\n`
)
}
const translatorLangs = {
deepl: [
// 2021-05-25
'bg', 'zh', 'cs', 'da', 'nl', 'et', 'fi', 'fr', 'de', 'el', 'hu', 'it', 'ja',
'lv', 'lt', 'pl', 'pt', 'pt-PT', 'pt-BR', 'ro', 'ru', 'sk', 'sl', 'es', 'sv'
],
};
const previewTextLength = 500;
const fs = require('fs');
const appRoot = require('app-root-path').path;
const path = require('path');
const glob = require('fast-glob');
const { parse } = require('node-html-parser'); // patched version github:taoqf/node-fast-html-parser#60ea8fee51f07fbc712b5642a0496f12748eb90f
const htmlEntities = require('he');
const elevConf = require(appRoot + '/config/eleventy.config.js')();
process.chdir(appRoot);
const scriptPath = path.relative(appRoot, process.argv[1]);
const inputDir = elevConf.dir.input;
const infilesGlob = inputDir + '/pages/*.html';
const sourceLangList = ['de', 'en']; // TODO get from 11ty metadata
// https://github.com/iansan5653/unraw/issues/29
// deepl.com:
// / -> \/
// \ -> \\
function deeplBackslashEncode(str) {
let res = '';
for (let i = 0; i < str.length; i++) {
const char16bit = str[i];
const code = char16bit.charCodeAt(0);
res += (
(code == 47) ? '\\/' : // forward slash
(code == 92) ? '\\\\' : // backslash
char16bit
);
}
return res;
}
function dateTime(date = null) {
// sample result: '2021-03-21.21-05-36'
if (!date) date = new Date();
return date.toLocaleString('lt').replace(/:/g, '-').replace(' ', '.');
}
const nowDate = dateTime();
const crypto = require("crypto");
function sha1sum(str) {
return crypto.createHash("sha1").update(str).digest("base64");
}
// google can translate -- to -
// so we use "safe" ids without repetition
function getNextSafeId(lastId) {
for (let id = (lastId + 1); ; id++) {
let idStr = id.toString();
let idSafe = true;
for (let charIdx = 0; charIdx < (idStr.length - 1); charIdx++) {
if (idStr[charIdx] == idStr[charIdx + 1]) {
// found repetition
idSafe = false;
//if (showDebug) console.log(`skip unsafe id ${id}`);
break;
}
}
if (idSafe) return id;
}
}
/////////////////////// export ////////////////////////////
function exportLang(sourceLang = 'de', targetLang = 'en') {
// TODO more dynamic ...
// check src/pages/*.html if sourceLang is found
if (sourceLangList.includes(sourceLang) == false) {
console.log(`error: sourceLang ${sourceLang} not found. must be one of: ${sourceLangList.join(', ')}`);
process.exit(1);
}
const htmlFile = `translate-${sourceLang}2${targetLang}.html`;
if (!dryRunExport && fs.existsSync(htmlFile)) {
console.log(`error: output file exists: ${htmlFile}`);
console.log(`\nsolutions:`);
console.log(`mv ${htmlFile} ${htmlFile}.${nowDate}.bak`);
console.log(`rm ${htmlFile}`);
process.exit(1);
}
console.log(`glob: ${infilesGlob}`);
const textParts = [];
//const replacementList = [];
const replacementData = {};
replacementData.replacementList = {}; // sparse array due to "safe" ids, see getNextSafeId
//replacementData.indentList = [];
replacementData.lastId = -1;
function fmtNum(num) {
// split long number in groups of three digits
// https://stackoverflow.com/a/6786040/10440128
return `${num}`.replace(/(\d)(?=(\d{3})+$)/g, '$1 ');
}
function getReplace(match) {
// global: replacementData
const replacementId = getNextSafeId(replacementData.lastId);
replacementData.lastId = replacementId;
replacementData.replacementList[replacementId] = {};
replacementData.replacementList[replacementId].value = match;
replacementData.replacementList[replacementId].code = encodeNum(replacementId);
replacementData.replacementList[replacementId].indentList = [];
return `\n[${encodeNum(replacementId)}]\n`;
}
// loop input files
glob.sync(infilesGlob)
//.slice(0, 2) // debug: process less input files
.forEach((file, fileIdx) => {
console.log(`input: ${file}`);
const inputHtml = fs.readFileSync(file, 'utf8');
const root = parse(inputHtml);
const parentNodes = root.querySelectorAll('langs, .langs');
// loop parentNodes -> get textParts
for (const [pi, p] of parentNodes.entries()) {
// loop nodes
p.querySelectorAll(`${sourceLang}, *[lang="${sourceLang}"]`).forEach((n, ni) => {
const wrap = (n.hasAttribute('lang') == false);
const nodeStart = n._source.start; // only in patched version of html parser
const lineStart = inputHtml.lastIndexOf('\n', n._source.start) + 1;
const indent = inputHtml.slice(lineStart, nodeStart).match(/^\s*/)[0];
//if (showDebug) console.log(`indent = ${JSON.stringify(indent)}`);
//const tagName = targetLang;
const tagName = `lang.${targetLang}`;
//const tagAttrs = `generator="${translatorName}" t="${nowDate}"`;
// base of translation = sourceText
const base = `${sourceLang}#${sha1sum(n.innerHTML).slice(0, 8)}`;
const extraAttrs = `rev="${base}"`; // add revision ID
// TODO properly parse + replace attributes if wrap == false
const sBase = indent + (wrap
? `<${tagName} ${extraAttrs}>${n.innerHTML}</${tagName}>`
: n.outerHTML.replace(new RegExp(`^<([^>\\s]+)\\s+[^>]*lang="${sourceLang}"[^>]*>`, 's'), `<$1 lang="${targetLang}" ${extraAttrs}>`)
);
if (showDebug) console.dir({ indent, wrap, tagName, extraAttrs });
const sXml = `<html f="${fileIdx}" p="${pi}" n="${ni}">\n${sBase}\n</html>`;
if (showDebug) console.log(`textPart before replace:\n${sXml}`);
// encode html
// replace with "symbols in square braces"
// consume all whitespace around the original value
let textPart = sXml.replace(
new RegExp(
[
`\\s*`, // space before
`(?:`,
`\\[[${codeNumRegexCharClassImport}]+\\]`, // "symbols in square braces"
`|`,
`\\n{2,}`, // extra newlines: needed for transliterated translations
`|`,
`<.+?>`, // html tags
`|`,
`&[^ ]+;`, // html entities
`)`,
`\\s*` // space after
].join(''),
'sg'
),
match => getReplace(match)
);
// encode indents between replacements
// use lookahead (?=...) to include delimiter as prefix
if (1) {
textPart = (
textPart
.split(new RegExp(`(?=\\n\\[[${codeNumRegexCharClass}]+\\]\\n)`))
.map(str => {
//console.dir({ str });
let [_, replacement, idxStr, rest] = str.match(new RegExp(`(\\n\\[([${codeNumRegexCharClass}]+)\\]\\n)(.*)$`, 's'));
const replaceId = decodeNum(idxStr);
//console.dir({ rest });
//replacementData.indentList[replaceId] = [];
replacementData.replacementList[replaceId].indentList = [];
// remove indents
rest = rest.split('\n').map(line => {
const [_, indent, lineRest] = line.match(/^(\s*)(.*)/);
replacementData.replacementList[replaceId].indentList.push(indent);
return lineRest;
}).join('\n');
return replacement + rest;
})
).join('');
}
if (showDebug) console.log(`textPart after replace:\n${textPart}`);
textParts.push(textPart);
})
} // done loop parentNodes
//console.dir(replacementData.indentList);
//if (fileIdx > 1) process.exit(0); // DEBUG
}); // done loop input files
if (1 && showDebug) {
for (const id of Object.keys(replacementData.replacementList)) { // sparse array
console.log(`[${encodeNum(id)}] = id ${id} = ${replacementData.replacementList[id].value}`)
}
}
if (dryRunExport) return;
// generate links
let lastGroupSize = 0;
const textGroups = (
textParts.reduce((acc, val) => {
const nextLen = acc[acc.length - 1].length + val.length + 3*(`\n\n<meta attrrrrrrrr="vallll"/>\n\n`.length);
if (nextLen >= charLimit) {
acc.push('');
lastGroupSize = 0;
}
acc[acc.length - 1] += val + '\n\n';
lastGroupSize++;
return acc;
}, [''])
/* DEBUG is this broken?
// group siblings
.map(textGroup => textGroup.replace(/\n(?:\[[\d ]+\]\s*){2,}\n/sg, matchStr => {
const replaceIdList = [];
// preserve extra whitespace between replacements
matchStr.replace(/(\s*)\n\[([\d ]+)\]\n(\s*)/g, (_, spaceBefore, idStr, spaceAfter) => {
const replaceId = parseInt(idStr.replace(/ /g, ''));
if (0 && showDebug) console.dir({ replaceId }); // verbose
replacementList[replaceId] = (
spaceBefore + replacementList[replaceId] + spaceAfter
);
replaceIdList.push(replaceId);
});
if (0 && showDebug) console.dir({ matchStr, replaceIdList }); // verbose
const firstId = replaceIdList.shift();
// move all replacements to firstId
for (const replaceId of replaceIdList) {
replacementList[firstId] += replacementList[replaceId];
replacementList[replaceId] = '';
}
return `\n[${fmtNum(firstId)}]\n`;
}))
*/
);
if (showDebug) {
console.log(textGroups.map((s, i) => `textGroup ${i}:\n${s}\n`).join('\n'));
}
const translateUrl = t => (
translatorName == 'google' ? `https://translate.google.com/?sl=${sourceLang}&tl=${targetLang}&text=${encodeURIComponent(t)}&op=translate` :
translatorName == 'deepl' ? `https://www.deepl.com/translator#${sourceLang}/${targetLang}/${encodeURIComponent(deeplBackslashEncode(t))}` :
'#invalid-translatorName'
);
const translateLinks = textGroups.map(t => (
`<li><a target="_blank" href="${translateUrl(t)}">${htmlEntities.encode(t.slice(0, previewTextLength/2))} ... ${htmlEntities.encode(t.slice(-previewTextLength/2))}</a></li>`
));
const htmlSrc = (
'<style>' +
'a:visited { color: green; }' +
'a { text-decoration: none; }' +
'a:hover { text-decoration: underline; }' +
'li { margin-bottom: 1em; }' +
'</style>' +
'<ol>\n\n' + translateLinks.join('\n\n') + '</ol>\n' +
// embed replacements in html comment
'<!-- replacementData = ' +
JSON.stringify(replacementData, null, 2) +
' = replacementData -->'
);
fs.writeFileSync(htmlFile, htmlSrc, 'utf8');
const htmlFileUrl = encodeURI('file://' + path.resolve(htmlFile));
console.log(`output: ${htmlFile}`);
console.log(`
next steps:
1. open in your browser:
${htmlFileUrl}
2. click the first link
3. fix the translation on the translator website,
so the translator can learn to translate better
4. scroll down, on the bottom right, click on: copy translation
5. paste the translation to your text editor
remove the footers:
Translated with www.DeepL.com/Translator (free version)
6. repeat for all links (append translations to text file)
7. save the text file, for example as translate-${sourceLang}2${targetLang}.txt
8. run this script again with the text file, for example:
node ${scriptPath} ${sourceLang} ${targetLang} translate-${sourceLang}2${targetLang}.txt
9. add the new language code to src/_data/metadata.js -> metadata.languages
10. restart the dev server (to reload metadata.js. hot reload is not working here)
11. commit the new translation:
git add src/pages/ src/_data/metadata.js
git commit -m 'alchi-book: add ${targetLang} translation'
12. commit the new build:
# stop the dev server
npm run build
git add build/
git commit -m 'alchi-book: update build'
note:
translators will change the order of words,
so in some cases, html markup tags like <b>....</b>
will be in a wrong position.
note:
the ${htmlFile} file is valid only for one iteration.
if you added nodes to the html files,
then you must generate a new ${htmlFile} file
`)
}
/////////////////////// import ////////////////////////////
function importLang(sourceLang, targetLang, inputFile) {
let input = fs.readFileSync(inputFile, 'utf8');
// remove unwanted characters
input = input.replace(new RegExp(`[${removeRegexCharClass}]`, 'g'), '');
// decode replacements
const htmlFile = `translate-${sourceLang}2${targetLang}.html`;
if (fs.existsSync(htmlFile) == false) {
console.log(`error: html file not found: ${htmlFile}`);
console.log('this file is required to decode replacements');
process.exit(1);
}
const htmlSrc = fs.readFileSync(htmlFile, 'utf8');
const replacementDataMatch = htmlSrc.match(/<!-- replacementData = (.*) = replacementData -->/s);
if (replacementDataMatch == null) {
console.log(`parse error: replacementData not found in ${htmlFile}`);
process.exit(1);
}
const replacementData = JSON.parse(replacementDataMatch[1]);
console.log(`loaded ${Object.keys(replacementData.replacementList).length} replacements from ${htmlFile}`);
if (1 && showDebug) {
for (const id of Object.keys(replacementData.replacementList)) { // sparse array
console.dir({ id, replacement: replacementData.replacementList[id] })
console.log(`[${encodeNum(id)}] = id ${id} = ${replacementData.replacementList[id].value}`)
}
}
// quickfix to restore newlines around replacements
input = ('\n' + input + '\n');
// decode replacements and indents
// copy pasta ...
// use lookahead (?=...) to include delimiter as prefix
// \n? -> allow missing newlines around replacements -> parse errors! -> revoke
let lastReplaceId = -1;
input = (
input
.split(new RegExp(`(?=\\n\\[[${codeNumRegexCharClassImport}]+\\]\\n)`)) // add \n to match
.map((str, blockIdx) => {
console.dir({ blockStr: str });
const m = str.match(new RegExp(`(\\n?\\[([${codeNumRegexCharClassImport}]+)\\]\\n?)(.*)$`, 's')); // optional \n
//const m = str.match(new RegExp(`(\\n\\[([${codeNumRegexCharClassImport}]+)\\]\\n)(.*)$`, 's')); // require \n
if (!m) return str; // no replace
let [_, _replacement, idxStr, rest] = m;
const replaceId = decodeNum(idxStr);
// test for steadiness = simple validation
if (replaceId < lastReplaceId) {
console.log(`error: replaceId is not steady. did the translator break our code?`)
console.log(`lastReplaceId = ${lastReplaceId}`)
console.log(`replaceId = ${replaceId}`)
}
if (showDebug) console.dir({ _replacement, idxStr, replaceId, rest });
if (!replacementData.replacementList[replaceId]) {
console.log(`error: invalid replaceId ${replaceId}. did the translator break our code?`)
console.dir({ _replacement, idxStr, replaceId, rest });
console.log(`last valid replaceId was ${lastReplaceId}`)
// TODO show context in sourceText?
// we are looking for the original replacement-code
// TODOODODOODODOODOD
const nextSafeId = getNextSafeId(lastReplaceId);
console.log(`next safe replaceId would be ${nextSafeId}:`)
console.dir(replacementData.replacementList[nextSafeId])
console.log(`wild guess: the translator translated our code ${idxStr} to ${replacementData.replacementList[nextSafeId].code}`)
console.dir({ originalCode: replacementData.replacementList[nextSafeId].code, translatedCode: idxStr });
console.log(`error: failed to decode replacements`);
process.exit(1); // fatal error
// its a bad solution to guess the right replaceId
// cos in many cases, this will fail silently, and produce errors in the result
// we can reduce the change of "false positives"
// by making the replacement ids more sparse, or random
}
if (replacementData.replacementList[replaceId].consumed) {
console.log(`error: duplicate replaceId ${replaceId}`);
console.dir({ _replacement, idxStr, replaceId, rest });
console.log(`replacement was already consumed in:`);
console.dir(replacementData.replacementList[replaceId].consumedBy);
console.log(`error: failed to decode replacements`);
process.exit(1); // fatal error
}
// restore indents
// FIXME make this more robust against newline removing (transliterated translations)
//console.dir({ indentsForBlock: replacementData.indentList[replaceId] });
rest = rest.split('\n').map((lineRest, lineIdx) => {
//const indent = replacementData.indentList[replaceId][lineIdx] || '';
const indent = replacementData.replacementList[replaceId].indentList[lineIdx] || '';
//console.dir({ replaceId, lineIdx, indent, lineRest });
return indent + lineRest;
}).join('\n');
// decode replacement
const replacement = replacementData.replacementList[replaceId].value;
replacementData.replacementList[replaceId].consumed = true; // prevent collisions
replacementData.replacementList[replaceId].consumedBy = { blockIdx, blockStr: str };
//console.dir({ replaceId, replacement, rest });
lastReplaceId = replaceId;
return replacement + rest;
})
).join('');
if (showDebug) {
console.log('decoded html:');
console.log(input);
//return; // DEBUG
}
// validate html
const numOpen = input.match(/<html/g).length;
const numClose = input.match(/<\/html>/g).length;
if (numOpen != numClose) {
console.log(`ERROR: html tags mismatch in ${inputFile}:`);
console.log(`open <html> tags: ${numOpen}`);
console.log(`close </html> tags: ${numClose}`);
// locate first mismatch
// assume: html tags are not nested
const idxList = {};
input.replace(/<html/g, (_, idx) => (idxList[idx] = 1));
input.replace(/<\/html>/g, (_, idx) => (idxList[idx] = 0));
let level = 0;
// this is a mess :D
const idxIdxList = Object.keys(idxList);
//for (const idxStr of Object.keys(idxList)) {
for (let idxIdx = 0; idxIdx < idxIdxList.length; idxIdx++) {
const idxStr = idxIdxList[idxIdx];
const isOpen = (idxList[idxStr] == 1);
if (isOpen) level++;
else level--;
if (level < 0 || 1 < level) {
const idx = parseInt(idxStr);
// TODO handle idxIdx out of range ...
const last2Idx = parseInt(idxIdxList[idxIdx - 2]);
const lastIdx = parseInt(idxIdxList[idxIdx - 1]);
const nextIdx = parseInt(idxIdxList[idxIdx + 1]);
const next2Idx = parseInt(idxIdxList[idxIdx + 2]);
const m = '='.repeat(30);
console.log(`html tags mismatch starts at position ${idxStr}. context:`);
console.log(`${m} pos -2 = ${last2Idx} ${m}`);
console.log(input.slice(last2Idx, lastIdx));
console.log(`${m} pos -1 = ${lastIdx} ${m}`);
console.log(input.slice(lastIdx, idx));
console.log(`${m} pos 0 = ${idxStr} ${m}`);
console.log(input.slice(idx, nextIdx));
console.log(`${m} pos +1 = ${nextIdx} ${m}`);
console.log(input.slice(nextIdx, next2Idx));
console.log(`${m} pos +2 = ${next2Idx} ${m}`);
break; // show only first
}
}
console.log(`fatal error: html tags mismatch. translation was not imported`)
process.exit(1);
}
// parse
const textParts = [];
const inputRest = input.replace(
/<html f="([0-9]+)" p="([0-9]+)" n="([0-9]+)">\n(.+?)\n<\/html>/sg,
(_, file, part, node, str) => {
file = parseInt(file);
part = parseInt(part);
node = parseInt(node);
textParts.push({ file, part, node, str });
return ''; // remove from input
}
);
// should be empty ...
const inputRestTrimmed = inputRest.trim();
if (inputRestTrimmed.length > 0) {
console.log(`inputRest = ${JSON.stringify(inputRestTrimmed.replace(/\n+/sg, '\n'))}`);
}
const backupDir = `backup/translate/${nowDate}.${sourceLang}2${targetLang}`;
const rollbackScriptPath = `${backupDir}/rollback.sh`;
let rollbackScript = [
'#!/usr/bin/env bash',
'# rollback translation',
'shopt -s expand_aliases',
`alias mv='mv --verbose'`,
'',
].join('\n');
const diffScriptPath = `${backupDir}/diff.sh`;
let diffScript = [
'#!/usr/bin/env bash',
'# diff translation',
'shopt -s expand_aliases',
`alias diff='diff --unified --color=auto'`,
'',
].join('\n');
console.log(`glob: ${infilesGlob}`);
// we assume that source files did not change since we called exportLang()
// TODO verify automatic, mismatch is fatal error
let changedFiles = 0;
glob.sync(infilesGlob)
//.slice(0, 2) // debug: process less input files
.forEach((file, fileIdx) => {
//console.log(`input: ${file}`);
const sourceBefore = fs.readFileSync(file, 'utf8');
const root = parse(sourceBefore);
const parentNodes = root.querySelectorAll('langs, .langs');
let insertedNodes = 0;
// loop parentNodes -> get textParts
for (const [pi, p] of parentNodes.entries()) {
p.querySelectorAll(`${sourceLang}, *[lang="${sourceLang}"]`).forEach((n, ni) => {
const textNode = textParts.find(o => (
o.file == fileIdx &&
o.part == pi &&
o.node == ni
));
if (!textNode) {
//console.log(`warning: no imported translation for ${fileIdx} ${pi} ${ni} ${n.toString().slice(0, 100)} ...`);
return; // continue
}
n.insertAdjacentHTML('afterend', '\n' + textNode.str);
insertedNodes++;
if (dryRunImport) {
const indent = textNode.str.match(/^\s*/)[0]; // quick n dirty ...
console.log(`old node:`); console.dir({ ...textNode, str: '', file }); console.log(indent + n.toString())
console.log(`new node:`); console.log(textNode.str)
}
})
}
if (!dryRunImport && insertedNodes > 0) {
// move original to backup
const backupFile = `${backupDir}/${file}`;
fs.mkdirSync(path.dirname(backupFile), { recursive: true });
fs.renameSync(file, backupFile);
console.log(`output: ${backupFile}`);
rollbackScript += `mv ${backupFile} ${file}\n`
diffScript += `diff ${backupFile} ${file}\n`
changedFiles++;
//const outFile = file + '.add-' + targetLang + '.txt'; // use *.txt extension to avoid *.html glob match
const outFile = file; // replace input file (after creating a backup copy)
fs.writeFileSync(outFile, root.toString(), 'utf8');
console.log(`output: ${outFile}`);
}
});
if (!dryRunImport && changedFiles > 0) {
rollbackScript += `mv ${backupDir} ${backupDir}.rolled-back\n`
fs.writeFileSync(rollbackScriptPath, rollbackScript, 'utf8');
console.log(`output: ${rollbackScriptPath}`);
fs.writeFileSync(diffScriptPath, diffScript, 'utf8');
console.log(`output: ${diffScriptPath}`);
}
}
main();