fix: dedup entries & parse .js links only
This commit is contained in:
parent
d880d44655
commit
b0477feea0
5 changed files with 108529 additions and 242610 deletions
117037
dist/tracking-data.txt
vendored
117037
dist/tracking-data.txt
vendored
File diff suppressed because it is too large
Load diff
117032
dist/tracking-filter-vivaldi.txt
vendored
117032
dist/tracking-filter-vivaldi.txt
vendored
File diff suppressed because it is too large
Load diff
117032
dist/tracking-filter.txt
vendored
117032
dist/tracking-filter.txt
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,17 +1,13 @@
|
|||
'use strict'
|
||||
|
||||
const { join } = require('path')
|
||||
const { appendFile, readdir, readFile, writeFile } = require('fs').promises
|
||||
const { appendFile, readdir, readFile, rm } = require('fs').promises
|
||||
const { parse } = JSON
|
||||
|
||||
const f = async () => {
|
||||
// __dirname is src/
|
||||
const outputFile = join(__dirname, '../dist/tracking-data.txt')
|
||||
await writeFile(outputFile, `# Title: Tracking URL
|
||||
# Updated: ${new Date().toUTCString()}
|
||||
# Repo: https://gitlab.com/curben/tracking-filter
|
||||
# License: https://gitlab.com/curben/tracking-filter#license
|
||||
# Source: https://github.com/duckduckgo/tracker-radar\n`)
|
||||
const outputFile = join(__dirname, '../tmp/tracking-data-raw.txt')
|
||||
await rm(outputFile, { force: true })
|
||||
const domains = join(__dirname, '../tmp/tracker-radar/domains')
|
||||
const countries = await readdir(domains)
|
||||
for (const country of countries) {
|
||||
|
@ -21,8 +17,8 @@ const f = async () => {
|
|||
const { resources } = parse(data)
|
||||
const tracking = resources.filter(({ fingerprinting }) => fingerprinting === 1)
|
||||
for (const { rule } of tracking) {
|
||||
const link = rule.replace(/\\/g, '') + '\n'
|
||||
await appendFile(outputFile, link)
|
||||
const link = rule.replace(/\\/g, '').replace(/^www\./g, '')
|
||||
if (link.endsWith('.js')) await appendFile(outputFile, link + '\n')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,20 +7,15 @@ cd "tmp/"
|
|||
|
||||
|
||||
# Prepare datasets
|
||||
# rm -rf "tracker-radar/"
|
||||
# git clone --depth 1 "https://github.com/duckduckgo/tracker-radar" "tracker-radar/"
|
||||
rm -rf "tracker-radar/"
|
||||
git clone --depth 1 "https://github.com/duckduckgo/tracker-radar" "tracker-radar/"
|
||||
|
||||
# Extract tracking links
|
||||
node "../src/script.js"
|
||||
|
||||
# # Extract tracking links
|
||||
# node "../src/script.js"
|
||||
|
||||
|
||||
# # Cleanup
|
||||
# cat "../dist/tracking-data.txt" | \
|
||||
# # Remove comment
|
||||
# sed "/^#/d" | \
|
||||
# # Remove www.
|
||||
# sed "s/^www\.//g" > "tracking-url.txt"
|
||||
# Cleanup
|
||||
cat "tracking-data-raw.txt" | \
|
||||
sort -u > "tracking-url.txt"
|
||||
|
||||
|
||||
CURRENT_TIME="$(date -R -u)"
|
||||
|
@ -32,6 +27,10 @@ FIFTH_LINE="! License: https://gitlab.com/curben/tracking-filter#license"
|
|||
SIXTH_LINE="! Source: https://github.com/duckduckgo/tracker-radar"
|
||||
COMMENT_UBO="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE"
|
||||
|
||||
# Original data
|
||||
cat "tracking-url.txt" | \
|
||||
sed '1 i\'"$COMMENT_UBO"'' | \
|
||||
sed "s/^!/#/g" > "../dist/tracking-data.txt"
|
||||
|
||||
# uBO & Adguard
|
||||
cat "tracking-url.txt" | \
|
||||
|
@ -39,7 +38,6 @@ sed "s/^/||/g" | \
|
|||
sed "s/$/\$all/g" | \
|
||||
sed '1 i\'"$COMMENT_UBO"'' > "../dist/tracking-filter.txt"
|
||||
|
||||
|
||||
# Vivaldi
|
||||
cat "tracking-url.txt" | \
|
||||
sed "s/^/||/g" | \
|
||||
|
|
Loading…
Reference in a new issue