forked from Ag/disspam
44 lines
1.2 KiB
Python
44 lines
1.2 KiB
Python
|
|
import os
|
|
import json
|
|
import time
|
|
import requests
|
|
import contextlib
|
|
from typing import Dict
|
|
|
|
with contextlib.closing(requests.Session()) as session:
|
|
|
|
for page in range(1, 245+1):
|
|
|
|
file = f'disroot_repos_{page}.html'
|
|
hfile = f'disroot_repos_{page}_head.json'
|
|
|
|
if os.path.exists(file):
|
|
continue
|
|
|
|
print(f'get page {page}')
|
|
url = f'https://git.disroot.org/explore/repos?page={page}&sort=oldest&q=&topic=false&language=&only_show_relevant=false'
|
|
response = session.get(url)
|
|
|
|
with open(hfile, 'w') as f:
|
|
f.write(json.dumps({
|
|
'url': url,
|
|
'status': response.status_code,
|
|
'headers': [(k, v) for k, v in response.headers.items()],
|
|
}))
|
|
|
|
try:
|
|
response.raise_for_status()
|
|
except:
|
|
print(f'Error fetching URL "{url}"!')
|
|
print(f' Status: {response.status_code}')
|
|
print(f' Headers:')
|
|
for k, v in response.headers.items():
|
|
print(f' {k}: {v}')
|
|
raise
|
|
|
|
with open(file, 'w') as f:
|
|
f.write(response.text)
|
|
|
|
time.sleep(2)
|