eu data and the shell scripts used to create it
This commit is contained in:
commit
68e5f235a1
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,50 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
temp="/tmp/clean-hp-tempXXXXXXXXXX"
|
||||||
|
tempname=$(mktemp "$temp")
|
||||||
|
|
||||||
|
trap 'rm "$temp"' INT TERM QUIT
|
||||||
|
|
||||||
|
quote_removal() {
|
||||||
|
previous_line=""
|
||||||
|
merge=""
|
||||||
|
while read -r line; do
|
||||||
|
if echo "$previous_line" | grep -q '^"'; then
|
||||||
|
if echo "$line" | grep -q '^"'; then
|
||||||
|
merge="$previous_line $(echo "$line" | cut -c 2-)"
|
||||||
|
else
|
||||||
|
merge="$previous_line $line"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if echo "$merge" | grep -Eq '^"[^"]+"[\.!]?$'; then
|
||||||
|
echo "$merge"
|
||||||
|
previous_line=""
|
||||||
|
else
|
||||||
|
if echo "$line" | grep -q '^"'; then
|
||||||
|
if [ -n "$merge" ]; then
|
||||||
|
previous_line="$merge"
|
||||||
|
elif echo "$line" | grep -Eq '"[\.!]?$'; then
|
||||||
|
echo "$line" | sed -E 's|(")([\.!])$|\2|'
|
||||||
|
previous_line=""
|
||||||
|
else
|
||||||
|
previous_line="$line"
|
||||||
|
fi
|
||||||
|
elif echo "$line" | grep -Eq '"[\.!]?$'; then
|
||||||
|
echo "$merge"
|
||||||
|
previous_line=""
|
||||||
|
else
|
||||||
|
[ -n "$previous_line" ] && echo "$previous_line"
|
||||||
|
echo "$line"
|
||||||
|
previous_line=""
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
merge=""
|
||||||
|
done < "${1:-/dev/stdin}"
|
||||||
|
}
|
||||||
|
|
||||||
|
sed -E 's|^--?||;s|^"?–||;s|—||;s|―||;s|^_||;s|^ ||' < "${1:-/dev/stdin}" |
|
||||||
|
uniq | quote_removal | sed -E 's|^"+ ?||' > "$tempname"
|
||||||
|
sh until-no-change.sh "sed '/,$/ {N; s/\n/ /g;}' < '$tempname' | uniq" \
|
||||||
|
'sed "/,$/ {N; s/\n/ /g;}" | uniq'
|
||||||
|
|
||||||
|
rm "$tempname"
|
|
@ -0,0 +1,3 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
sh clean.sh < "${1:-/dev/stdin}" | paste - -
|
|
@ -0,0 +1,12 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
current=$(mktemp '/tmp/until-no-change-currentXXXXXXX')
|
||||||
|
prev=$(mktemp '/tmp/until-no-change-prevXXXXXXX')
|
||||||
|
sh -c "$1" > "$current"
|
||||||
|
|
||||||
|
while [ -n "$(diff "$current" "$prev")" ]; do
|
||||||
|
cp "$current" "$prev"
|
||||||
|
sh -c "cat '$prev' | $2 > '$current'"
|
||||||
|
done
|
||||||
|
|
||||||
|
cat "${current}"
|
|
@ -0,0 +1,3 @@
|
||||||
|
torch==1.1.0
|
||||||
|
torchtext==0.5.0
|
||||||
|
tqdm
|
Loading…
Reference in New Issue