Data using simple preprocessing

This commit is contained in:
i.ortega 2020-05-03 01:28:33 +02:00
parent 68e5f235a1
commit b5f573f927
2 changed files with 504143 additions and 0 deletions

504139
data/eu_train_simple.tsv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,4 @@
#!/usr/bin/env sh
sed -E 's|^--?||;s|^"?||;s|—||;s|―||;s|^_||;s|^ ||' < "${1:-/dev/stdin}" |
uniq | paste - -