forked from Ag/disspam
initial commit
This commit is contained in:
commit
9c0cc51be2
|
@ -0,0 +1,3 @@
|
|||
*.db
|
||||
*.html
|
||||
*.json
|
|
@ -0,0 +1,56 @@
|
|||
# disspam
|
||||
|
||||
## Scripts
|
||||
|
||||
### discrawl.py
|
||||
|
||||
Crawls the explore tab to list public repos.
|
||||
|
||||
### disimport.py
|
||||
|
||||
Parses the html downloaded by discrawl.py and saves it in an sqlite database.
|
||||
|
||||
### disserve.py
|
||||
|
||||
Serves a web interface for tagging repos as good or bad. This uses the
|
||||
`disrepos.db` database file created by disimport.py.
|
||||
|
||||
Run: `flask --app disserve --debug run` (tested with Flask 2.2.2)
|
||||
|
||||
Display:
|
||||
|
||||
Repos that are marked good or bad have their first column change the background
|
||||
color to green or red.
|
||||
|
||||
Repo owners with at least one bad repo are shown with a red background color on
|
||||
all of their repos. Note that this isn't immediately applied after marking one
|
||||
of their repos, the table has to be updated first.
|
||||
|
||||
The currently selected row has a yellow background color. The selection is used
|
||||
for keyboard shortcuts that operate on a row.
|
||||
|
||||
The row currently being hovered with the mouse has a light pink background color
|
||||
to make the table easier to read.
|
||||
|
||||
Controls:
|
||||
|
||||
- W: Open repo owner in new tab
|
||||
- E: Open repo itself in new tab
|
||||
- A: Mark as bad
|
||||
- S: Remove mark
|
||||
- D: Mark as good
|
||||
- Up/Down: Select previous/next row
|
||||
- Click: Select clicked row
|
||||
|
||||
The "Filter" box takes an expression to filter the table of shown repos.
|
||||
Expressions can use the columns shown in the table. For example,
|
||||
`!stars && !forks && desc` would show only repos with no forks or stars and a
|
||||
non-empty description.
|
||||
|
||||
To match repos marked a specific way, use `flag === "notSpam"` for good repos,
|
||||
`flag === "spam"` for bad repos and `!flag` for unmarked repos.
|
||||
|
||||
Feature wishlist:
|
||||
|
||||
- Multiple selection
|
||||
- Hide mirrors
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import requests
|
||||
import contextlib
|
||||
from typing import Dict
|
||||
|
||||
with contextlib.closing(requests.Session()) as session:
|
||||
|
||||
for page in range(1, 245+1):
|
||||
|
||||
file = f'disroot_repos_{page}.html'
|
||||
hfile = f'disroot_repos_{page}_head.json'
|
||||
|
||||
if os.path.exists(file):
|
||||
continue
|
||||
|
||||
print(f'get page {page}')
|
||||
url = f'https://git.disroot.org/explore/repos?page={page}&sort=oldest&q=&topic=false&language=&only_show_relevant=false'
|
||||
response = session.get(url)
|
||||
|
||||
with open(hfile, 'w') as f:
|
||||
f.write(json.dumps({
|
||||
'url': url,
|
||||
'status': response.status_code,
|
||||
'headers': [(k, v) for k, v in response.headers.items()],
|
||||
}))
|
||||
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except:
|
||||
print(f'Error fetching URL "{url}"!')
|
||||
print(f' Status: {response.status_code}')
|
||||
print(f' Headers:')
|
||||
for k, v in response.headers.items():
|
||||
print(f' {k}: {v}')
|
||||
raise
|
||||
|
||||
with open(file, 'w') as f:
|
||||
f.write(response.text)
|
||||
|
||||
time.sleep(2)
|
|
@ -0,0 +1,76 @@
|
|||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import sqlite3
|
||||
import requests
|
||||
import contextlib
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Dict
|
||||
|
||||
with (
|
||||
contextlib.closing(sqlite3.connect('disrepos.db', isolation_level=None)) as db,
|
||||
contextlib.closing(db.cursor()) as cursor,
|
||||
):
|
||||
try:
|
||||
cursor.execute('''
|
||||
DROP TABLE `repos`
|
||||
''')
|
||||
except:
|
||||
pass
|
||||
cursor.execute('''
|
||||
CREATE TABLE `repos` (
|
||||
`owner` TEXT NOT NULL CHECK(`owner`<>''),
|
||||
`name` TEXT NOT NULL CHECK(`name`<>''),
|
||||
`stars` INTEGER NOT NULL CHECK(`stars`>=0),
|
||||
`forks` INTEGER NOT NULL CHECK(`forks`>=0),
|
||||
`desc` TEXT CHECK(`desc`<>''),
|
||||
`lang` TEXT CHECK(`lang`<>''),
|
||||
`flag` TEXT CHECK(`flag`<>'')
|
||||
) STRICT
|
||||
''')
|
||||
|
||||
for page in range(1, 244+1):
|
||||
|
||||
last = (page == 244)
|
||||
|
||||
with open(f'disroot_repos_{page}.html', 'r') as f:
|
||||
doc_text = f.read()
|
||||
|
||||
bs = BeautifulSoup(doc_text, 'html.parser')
|
||||
|
||||
flex_lists = bs.select('.flex-list')
|
||||
assert len(flex_lists) == 1
|
||||
|
||||
flex_items = flex_lists[0].select(':scope > .flex-item')
|
||||
if last:
|
||||
assert flex_items and len(flex_items) <= 20
|
||||
else:
|
||||
assert len(flex_items) == 20
|
||||
|
||||
for flex_item in flex_items:
|
||||
|
||||
flex_item_mains = flex_item.select(':scope > .flex-item-main')
|
||||
assert len(flex_item_mains) == 1
|
||||
|
||||
row = flex_item_mains[0]
|
||||
|
||||
url = row.select(':scope > .flex-item-header > .flex-item-title > a')[0]['href']
|
||||
desc_el = row.select(':scope > .flex-item-body')
|
||||
desc = desc_el[0].decode_contents().strip() if len(desc_el) == 2 else None
|
||||
if not desc: # no empty strings in the db
|
||||
desc = None
|
||||
lang_el = row.select(':scope > .flex-item-header > .flex-item-trailing > a.muted[href*="&language="]')
|
||||
lang = lang_el[0].text.strip() if lang_el else None
|
||||
stars = int(row.select(':scope > .flex-item-header > .flex-item-trailing > a.text.grey.flex-text-inline[href$="/stars"]')[0].text)
|
||||
forks = int(row.select(':scope > .flex-item-header > .flex-item-trailing > a.text.grey.flex-text-inline[href$="/forks"]')[0].text)
|
||||
|
||||
url_parts = url.split('/')[1:]
|
||||
|
||||
owner = url_parts[0]
|
||||
name = url_parts[1]
|
||||
print(owner, name, stars, forks, desc, lang)
|
||||
cursor.execute('''
|
||||
INSERT INTO `repos` (`owner`, `name`, `stars`, `forks`, `desc`, `lang`)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
''', (owner, name, stars, forks, desc, lang))
|
|
@ -0,0 +1,108 @@
|
|||
|
||||
import json
|
||||
import sqlite3
|
||||
import contextlib
|
||||
from flask import Flask, Response, request
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/main.js')
|
||||
def main_js():
|
||||
with open('disstuff.js', 'r') as f:
|
||||
return Response(
|
||||
f.read(),
|
||||
mimetype='application/javascript',
|
||||
)
|
||||
|
||||
@app.route('/repos.json')
|
||||
def repos():
|
||||
with (
|
||||
contextlib.closing(sqlite3.connect('disrepos.db', isolation_level=None)) as db,
|
||||
contextlib.closing(db.cursor()) as cursor,
|
||||
):
|
||||
cursor.execute('''
|
||||
SELECT `rowid`, `owner`, `name`, `stars`, `forks`, `desc`, `lang`, `flag`
|
||||
FROM `repos`
|
||||
ORDER BY `rowid` ASC
|
||||
''')
|
||||
keys = 'rowid owner name stars forks desc lang flag'.split(' ')
|
||||
return Response(
|
||||
json.dumps([dict(zip(keys, row)) for row in cursor]),
|
||||
mimetype='text/json',
|
||||
)
|
||||
|
||||
@app.route('/badusers.json')
|
||||
def badusers():
|
||||
with (
|
||||
contextlib.closing(sqlite3.connect('disrepos.db', isolation_level=None)) as db,
|
||||
contextlib.closing(db.cursor()) as cursor,
|
||||
):
|
||||
cursor.execute('''
|
||||
SELECT DISTINCT `owner`
|
||||
FROM `repos`
|
||||
WHERE `owner` IN (
|
||||
SELECT `owner`
|
||||
FROM `repos`
|
||||
WHERE `flag`='spam')
|
||||
''')
|
||||
return Response(
|
||||
json.dumps([row[0] for row in cursor]),
|
||||
mimetype='text/json',
|
||||
)
|
||||
|
||||
@app.route('/flag', methods=['POST'])
|
||||
def flag():
|
||||
with (
|
||||
contextlib.closing(sqlite3.connect('disrepos.db', isolation_level=None)) as db,
|
||||
contextlib.closing(db.cursor()) as cursor,
|
||||
):
|
||||
rowid = int(request.form['rowid'])
|
||||
flag = request.form['flag']
|
||||
if flag == '':
|
||||
flag = None
|
||||
cursor.execute('''
|
||||
UPDATE `repos`
|
||||
SET `flag`=?
|
||||
WHERE `rowid`=?
|
||||
''', (flag, rowid))
|
||||
assert cursor.rowcount == 1
|
||||
return ''
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return '''
|
||||
<!doctype html>
|
||||
<style>
|
||||
html {
|
||||
font-size: smaller;
|
||||
background: gainsboro;
|
||||
}
|
||||
form {
|
||||
display: inline-block;
|
||||
}
|
||||
th {
|
||||
position: sticky;
|
||||
top: 0;
|
||||
background: gainsboro; /* sorry dark theme users */
|
||||
}
|
||||
tr:hover { background-color: mistyrose; }
|
||||
tr[data-active-row] {
|
||||
background-color: yellow;
|
||||
}
|
||||
[data-flag=spam] td:first-of-type { background: red; }
|
||||
td[data-bad-user] { background: red; }
|
||||
[data-flag=notSpam] td:first-of-type { background: green; }
|
||||
</style>
|
||||
Filter: <form onsubmit="update(); return false"><input list="filters" type="text" id="filter" style="width: 400px;"> <input type="submit" value="Update"></form> <span id="filterstatus"></span>
|
||||
<datalist id="filters">
|
||||
<option value="!stars&&!forks&&!lang&&desc"></option>
|
||||
<option value="desc&&!lang"></option>
|
||||
<option value="desc&&/viagra/i.test(desc)"></option>
|
||||
<option value="desc&&/<a href="(?![^"]*(github\.com|\\bgit\.))/i.test(desc)"></option>
|
||||
<option value="desc&&/buy|cashapp|erectile|escort|essay|support|viagra|wholesale/i.test(desc)"></option>
|
||||
</datalist>
|
||||
<hr>
|
||||
<div id="main"></div>
|
||||
<script src="/main.js"></script>
|
||||
'''
|
|
@ -0,0 +1,207 @@
|
|||
|
||||
var update = async function ()
|
||||
{
|
||||
filterstatus.textContent = "Working...";
|
||||
|
||||
var response = await fetch("badusers.json");
|
||||
var json = await response.json();
|
||||
var badUsers = {};
|
||||
for (var t of json)
|
||||
{
|
||||
badUsers[t] = true;
|
||||
}
|
||||
|
||||
var response = await fetch("repos.json");
|
||||
var json = await response.json();
|
||||
console.log(json.length);
|
||||
|
||||
main.innerHTML = "";
|
||||
|
||||
var filterFn = null;
|
||||
if (filter.value.trim())
|
||||
{
|
||||
filterFn = Function("with (arguments[1]) return eval(arguments[0]);").bind(null, filter.value);
|
||||
}
|
||||
|
||||
var table = document.createElement("table");
|
||||
table.setAttribute("border", "");
|
||||
var thead = document.createElement("thead");
|
||||
thead.innerHTML = `
|
||||
<th>rowid</th>
|
||||
<th>owner</th>
|
||||
<th>name</th>
|
||||
<th>desc</th>
|
||||
<th>stars</th>
|
||||
<th>forks</th>
|
||||
<th>lang</th>
|
||||
`;
|
||||
table.appendChild(thead);
|
||||
var tbody = document.createElement("tbody");
|
||||
table.appendChild(tbody);
|
||||
var numFilteredOut = 0;
|
||||
for (var t of json)
|
||||
{
|
||||
if (filterFn)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!filterFn(t))
|
||||
{
|
||||
numFilteredOut++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
// want to catch syntax errors
|
||||
alert(e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
var tr = document.createElement("tr");
|
||||
|
||||
tr.setAttribute("class", "table-row");
|
||||
if (t["flag"])
|
||||
tr.setAttribute("data-flag", t["flag"]);
|
||||
tr.setAttribute("data-rowid", t["rowid"]);
|
||||
|
||||
var tdRowid = document.createElement("td");
|
||||
tdRowid.textContent = t["rowid"];
|
||||
tdRowid.setAttribute("align", "right");
|
||||
tr.appendChild(tdRowid);
|
||||
|
||||
var tdOwner = document.createElement("td");
|
||||
if (badUsers[t["owner"]])
|
||||
tdOwner.dataset.badUser = true;
|
||||
var ownerLink = document.createElement("a");
|
||||
ownerLink.className = "owner-link";
|
||||
ownerLink.textContent = t["owner"];
|
||||
ownerLink.href = `https://git.disroot.org/${t["owner"]}`;
|
||||
ownerLink.target = "_blank";
|
||||
tdOwner.appendChild(ownerLink);
|
||||
tr.appendChild(tdOwner);
|
||||
|
||||
var tdName = document.createElement("td");
|
||||
var repoLink = document.createElement("a");
|
||||
repoLink.className = "repo-link";
|
||||
repoLink.textContent = t["name"];
|
||||
repoLink.href = `https://git.disroot.org/${t["owner"]}/${t["name"]}`;
|
||||
repoLink.target = "_blank";
|
||||
tdName.appendChild(repoLink);
|
||||
tr.appendChild(tdName);
|
||||
|
||||
var tdDesc = document.createElement("td");
|
||||
//~ tdDesc.textContent = t["desc"];
|
||||
tdDesc.innerHTML = t["desc"];
|
||||
tr.appendChild(tdDesc);
|
||||
|
||||
var tdStars = document.createElement("td");
|
||||
tdStars.textContent = t["stars"];
|
||||
tdStars.setAttribute("align", "right");
|
||||
tr.appendChild(tdStars);
|
||||
|
||||
var tdForks = document.createElement("td");
|
||||
tdForks.textContent = t["forks"];
|
||||
tdForks.setAttribute("align", "right");
|
||||
tr.appendChild(tdForks);
|
||||
|
||||
var tdLang = document.createElement("td");
|
||||
tdLang.textContent = t["lang"];
|
||||
tr.appendChild(tdLang);
|
||||
|
||||
tbody.appendChild(tr);
|
||||
}
|
||||
tbody.children[0].dataset.activeRow = true;
|
||||
main.appendChild(table);
|
||||
|
||||
filterstatus.textContent = `Showing ${json.length-numFilteredOut} of ${json.length} results (${numFilteredOut} filtered out)`;
|
||||
};
|
||||
|
||||
update();
|
||||
|
||||
var setFlag = async function (flag)
|
||||
{
|
||||
var ar = document.querySelector("[data-active-row]");
|
||||
rowid = ar.dataset.rowid;
|
||||
try
|
||||
{
|
||||
var response = await fetch("/flag", {
|
||||
method: "POST",
|
||||
body: `rowid=${encodeURIComponent(rowid)}&flag=${encodeURIComponent(flag)}`,
|
||||
headers: {"Content-Type": "application/x-www-form-urlencoded"},
|
||||
});
|
||||
if (response.status !== 200)
|
||||
throw new Error(`Request failed with status ${response.status}`);
|
||||
ar.dataset.flag = flag;
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
alert(e);
|
||||
}
|
||||
};
|
||||
|
||||
document.onkeydown = function (event)
|
||||
{
|
||||
//~ console.log(event);
|
||||
|
||||
if (event.ctrlKey || event.altKey || event.shiftKey)
|
||||
return;
|
||||
|
||||
if (event.srcElement.matches("input"))
|
||||
return;
|
||||
|
||||
var dir = 1;
|
||||
var nextOrPrev = null;
|
||||
switch (event.key)
|
||||
{
|
||||
case "ArrowDown":
|
||||
nextOrPrev = "nextElementSibling";
|
||||
case "ArrowUp":
|
||||
if (!nextOrPrev)
|
||||
nextOrPrev = "previousElementSibling";
|
||||
var ar = document.querySelector("[data-active-row]");
|
||||
if (ar && ar[nextOrPrev])
|
||||
{
|
||||
ar[nextOrPrev].dataset.activeRow = true;
|
||||
delete ar.dataset.activeRow;
|
||||
event.preventDefault();
|
||||
ar.scrollIntoView({block: "center"});
|
||||
}
|
||||
break;
|
||||
case "a":
|
||||
setFlag("spam");
|
||||
break;
|
||||
case "s":
|
||||
setFlag("");
|
||||
break;
|
||||
case "d":
|
||||
setFlag("notSpam");
|
||||
break;
|
||||
case "w":
|
||||
var ar = document.querySelector("[data-active-row]");
|
||||
if (ar)
|
||||
ar.querySelector(".owner-link").click();
|
||||
break;
|
||||
case "e":
|
||||
var ar = document.querySelector("[data-active-row]");
|
||||
if (ar)
|
||||
ar.querySelector(".repo-link").click();
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
document.onclick = function (event)
|
||||
{
|
||||
if (event.target.matches("tr, tr *"))
|
||||
{
|
||||
var newAr = event.target.closest("tr");
|
||||
var ar = document.querySelector("[data-active-row]");
|
||||
if (ar)
|
||||
delete ar.dataset.activeRow;
|
||||
newAr.dataset.activeRow = true;
|
||||
}
|
||||
};
|
Loading…
Reference in New Issue