refactor: xmlstarlet -> html-xml-utils

This commit is contained in:
MDLeom 2022-12-01 10:00:32 +00:00
parent e653ba90c6
commit 5a4a8bb9bc
No known key found for this signature in database
GPG Key ID: 32D3E28E96A695E8
1 changed files with 6 additions and 0 deletions

View File

@ -173,6 +173,12 @@ sort -u > "top-1m-tranco.txt"
# ## Append new line https://unix.stackexchange.com/a/31955
# sed '$a\' > "oisd-exclude.txt"
# # html-xml-utils
# cat "oisd-exclude.html" | \
# hxwls | \
# grep -F '?w=' | \
# sed 's/^?w=//g' > "oisd-exclude.txt"
# Merge Umbrella, Tranco, Radar and self-maintained top domains
cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" | \
sort -u > "top-1m-well-known.txt"