Merge pull request #126 from adren/patch-6

deduplicate list of user-agents
This commit is contained in:
Bunkerity 2021-05-02 15:14:10 +02:00 committed by GitHub
commit 10dc58cb6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 9 deletions

View File

@ -15,18 +15,12 @@ elif [ -S /tmp/autoconf.sock ] ; then
fi
# generate new conf
BLACKLIST="$(curl -s https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list)
$(curl -s https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Crawlers.txt)"
IFS= BLACKLIST="$(curl -s https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list;
curl -s https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Crawlers.txt | sort -u)"
if [ "$?" -ne 0 ] ; then
job_log "[BLACKLIST] can't update user-agent list"
fi
DATA=""
IFS=$'\n'
for ua in $BLACKLIST ; do
DATA="${DATA}~*${ua} yes;\n"
done
DATA_ESCAPED=$(echo "$DATA" | sed 's: :\\\\ :g' | sed 's:\\\\ yes;: yes;:g' | sed 's:\\\\\\ :\\\\ :g')
echo -e "map \$http_user_agent \$bad_user_agent { default no; $DATA_ESCAPED }" > /tmp/map-user-agent.conf
echo -e "map \$http_user_agent \$bad_user_agent { default no; $(echo $BLACKLIST | sed 's:\([^\\]\) :\1\\\\ :;s:^:~*:;s:$: yes;:') }" > /tmp/map-user-agent.conf
# check number of lines
lines="$(wc -l /tmp/map-user-agent.conf | cut -d ' ' -f 1)"