Fix classifiers and speed up
This commit is contained in:
parent
b8f160bf93
commit
4fad29c901
|
@ -16,5 +16,4 @@ To insert the metadata to the `cheese_shop` database, open a shell and run
|
|||
pip install -r tools/requirements.txt
|
||||
python tools/make-cheeses.py
|
||||
|
||||
The script requires Python 3.6 or above. Due to dummy handling of classifiers,
|
||||
`sql/def.sql` needs to be reloaded before each run.
|
||||
The script runs on Python 3.6 and above.
|
||||
|
|
14
sql/def.sql
14
sql/def.sql
|
@ -2,22 +2,18 @@ DROP DATABASE IF EXISTS cheese_shop;
|
|||
CREATE DATABASE cheese_shop;
|
||||
USE cheese_shop;
|
||||
|
||||
CREATE TABLE releases (
|
||||
id smallint AUTO_INCREMENT PRIMARY KEY,
|
||||
project varchar(32),
|
||||
version varchar(32),
|
||||
CONSTRAINT integrity UNIQUE (project, version));
|
||||
|
||||
CREATE TABLE contacts (
|
||||
email varchar(255) PRIMARY KEY,
|
||||
name varchar(255));
|
||||
|
||||
CREATE TABLE information (
|
||||
release_id smallint PRIMARY KEY,
|
||||
CREATE TABLE releases (
|
||||
id smallint AUTO_INCREMENT PRIMARY KEY,
|
||||
project varchar(32),
|
||||
version varchar(32),
|
||||
summary varchar(255),
|
||||
homepage varchar(2083),
|
||||
email varchar(255),
|
||||
FOREIGN KEY (release_id) REFERENCES releases(id),
|
||||
CONSTRAINT integrity UNIQUE (project, version),
|
||||
FOREIGN KEY (email) REFERENCES contacts(email));
|
||||
|
||||
CREATE TABLE troves (
|
||||
|
|
|
@ -21,10 +21,10 @@ class CheeseMaker:
|
|||
self.connection = connect(user=USER, database=DB)
|
||||
self.connection.autocommit = True
|
||||
self.cursor = self.connection.cursor()
|
||||
self.classifiers = {}
|
||||
for classifier in classifiers:
|
||||
self.insert('troves', classifier=classifier)
|
||||
self.classifiers[classifier] = self.cursor.lastrowid
|
||||
self.cursor.execute('SELECT classifier, id FROM troves')
|
||||
self.classifiers = dict(self.cursor.fetchall())
|
||||
|
||||
def start_soon(self, async_fn: Awaitable, *args: Any) -> None:
|
||||
"""Creates a child task, scheduling await async_fn(*args)."""
|
||||
|
@ -37,20 +37,25 @@ class CheeseMaker:
|
|||
f'INSERT IGNORE INTO {table} ({", ".join(items)}) '
|
||||
f'VALUES ({", ".join(map(repr, items.values()))})')
|
||||
|
||||
def insert_info(self, release_id: int, info: Dict[str, Any]) -> None:
|
||||
"""Insert auxiliary information of the given release."""
|
||||
def insert_info(self, info: Dict[str, Any]) -> int:
|
||||
"""Insert auxiliary information of the given release.
|
||||
|
||||
Return the release ID.
|
||||
"""
|
||||
self.insert('contacts', name=info['author'],
|
||||
email=info['author_email'])
|
||||
self.insert('information', release_id=release_id,
|
||||
self.insert('releases', project=info['name'], version=info['version'],
|
||||
summary=info['summary'], homepage=info['home_page'],
|
||||
email=info['author_email'])
|
||||
for classifier in info['classifiers']:
|
||||
release_id = self.cursor.lastrowid
|
||||
for classifier in (info['classifiers'] or []):
|
||||
self.insert('classifiers', release_id=release_id,
|
||||
trove_id=self.classifiers[classifier])
|
||||
for keyword in (info['keywords'] or '').split(','):
|
||||
self.insert('keywords', release_id=release_id, term=keyword)
|
||||
for dep in (info['requires_dist'] or []):
|
||||
self.insert('dependencies', release_id=release_id, dependency=dep)
|
||||
return release_id
|
||||
|
||||
def insert_dist(self, release_id: int,
|
||||
distributions: List[Dict[str, Any]]) -> None:
|
||||
|
@ -82,13 +87,12 @@ class CheeseMaker:
|
|||
except JSONDecodeError:
|
||||
return
|
||||
print('Processing', project_name, version)
|
||||
self.insert('releases', project=project_name, version=version)
|
||||
release_id = self.cursor.lastrowid
|
||||
self.insert_info(release_id, content['info'])
|
||||
release_id = self.insert_info(content['info'])
|
||||
self.insert_dist(release_id, content['urls'])
|
||||
|
||||
async def coagulate(self, project_name: str) -> None:
|
||||
"""Fetch project's available versions and metadata."""
|
||||
if project_name == 'ccxt': return # ccxt has way too many releases
|
||||
content = await self.json(f'/pypi/{project_name}/json')
|
||||
print('Fetching', project_name)
|
||||
for version in content['releases'].keys():
|
||||
|
|
Loading…
Reference in New Issue