eu data and the shell scripts used to create it
This commit is contained in:
commit
68e5f235a1
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
temp="/tmp/clean-hp-tempXXXXXXXXXX"
|
||||
tempname=$(mktemp "$temp")
|
||||
|
||||
trap 'rm "$temp"' INT TERM QUIT
|
||||
|
||||
quote_removal() {
|
||||
previous_line=""
|
||||
merge=""
|
||||
while read -r line; do
|
||||
if echo "$previous_line" | grep -q '^"'; then
|
||||
if echo "$line" | grep -q '^"'; then
|
||||
merge="$previous_line $(echo "$line" | cut -c 2-)"
|
||||
else
|
||||
merge="$previous_line $line"
|
||||
fi
|
||||
fi
|
||||
if echo "$merge" | grep -Eq '^"[^"]+"[\.!]?$'; then
|
||||
echo "$merge"
|
||||
previous_line=""
|
||||
else
|
||||
if echo "$line" | grep -q '^"'; then
|
||||
if [ -n "$merge" ]; then
|
||||
previous_line="$merge"
|
||||
elif echo "$line" | grep -Eq '"[\.!]?$'; then
|
||||
echo "$line" | sed -E 's|(")([\.!])$|\2|'
|
||||
previous_line=""
|
||||
else
|
||||
previous_line="$line"
|
||||
fi
|
||||
elif echo "$line" | grep -Eq '"[\.!]?$'; then
|
||||
echo "$merge"
|
||||
previous_line=""
|
||||
else
|
||||
[ -n "$previous_line" ] && echo "$previous_line"
|
||||
echo "$line"
|
||||
previous_line=""
|
||||
fi
|
||||
fi
|
||||
merge=""
|
||||
done < "${1:-/dev/stdin}"
|
||||
}
|
||||
|
||||
sed -E 's|^--?||;s|^"?–||;s|—||;s|―||;s|^_||;s|^ ||' < "${1:-/dev/stdin}" |
|
||||
uniq | quote_removal | sed -E 's|^"+ ?||' > "$tempname"
|
||||
sh until-no-change.sh "sed '/,$/ {N; s/\n/ /g;}' < '$tempname' | uniq" \
|
||||
'sed "/,$/ {N; s/\n/ /g;}" | uniq'
|
||||
|
||||
rm "$tempname"
|
|
@ -0,0 +1,3 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
sh clean.sh < "${1:-/dev/stdin}" | paste - -
|
|
@ -0,0 +1,12 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
current=$(mktemp '/tmp/until-no-change-currentXXXXXXX')
|
||||
prev=$(mktemp '/tmp/until-no-change-prevXXXXXXX')
|
||||
sh -c "$1" > "$current"
|
||||
|
||||
while [ -n "$(diff "$current" "$prev")" ]; do
|
||||
cp "$current" "$prev"
|
||||
sh -c "cat '$prev' | $2 > '$current'"
|
||||
done
|
||||
|
||||
cat "${current}"
|
|
@ -0,0 +1,3 @@
|
|||
torch==1.1.0
|
||||
torchtext==0.5.0
|
||||
tqdm
|
Loading…
Reference in New Issue