import os import json import time import requests import contextlib from typing import Dict with contextlib.closing(requests.Session()) as session: for page in range(1, 245+1): file = f'disroot_repos_{page}.html' hfile = f'disroot_repos_{page}_head.json' if os.path.exists(file): continue print(f'get page {page}') url = f'https://git.disroot.org/explore/repos?page={page}&sort=oldest&q=&topic=false&language=&only_show_relevant=false' response = session.get(url) with open(hfile, 'w') as f: f.write(json.dumps({ 'url': url, 'status': response.status_code, 'headers': [(k, v) for k, v in response.headers.items()], })) try: response.raise_for_status() except: print(f'Error fetching URL "{url}"!') print(f' Status: {response.status_code}') print(f' Headers:') for k, v in response.headers.items(): print(f' {k}: {v}') raise with open(file, 'w') as f: f.write(response.text) time.sleep(2)