Fix classifiers and speed up
This commit is contained in:
parent
b8f160bf93
commit
4fad29c901
|
@ -16,5 +16,4 @@ To insert the metadata to the `cheese_shop` database, open a shell and run
|
||||||
pip install -r tools/requirements.txt
|
pip install -r tools/requirements.txt
|
||||||
python tools/make-cheeses.py
|
python tools/make-cheeses.py
|
||||||
|
|
||||||
The script requires Python 3.6 or above. Due to dummy handling of classifiers,
|
The script runs on Python 3.6 and above.
|
||||||
`sql/def.sql` needs to be reloaded before each run.
|
|
||||||
|
|
14
sql/def.sql
14
sql/def.sql
|
@ -2,22 +2,18 @@ DROP DATABASE IF EXISTS cheese_shop;
|
||||||
CREATE DATABASE cheese_shop;
|
CREATE DATABASE cheese_shop;
|
||||||
USE cheese_shop;
|
USE cheese_shop;
|
||||||
|
|
||||||
CREATE TABLE releases (
|
|
||||||
id smallint AUTO_INCREMENT PRIMARY KEY,
|
|
||||||
project varchar(32),
|
|
||||||
version varchar(32),
|
|
||||||
CONSTRAINT integrity UNIQUE (project, version));
|
|
||||||
|
|
||||||
CREATE TABLE contacts (
|
CREATE TABLE contacts (
|
||||||
email varchar(255) PRIMARY KEY,
|
email varchar(255) PRIMARY KEY,
|
||||||
name varchar(255));
|
name varchar(255));
|
||||||
|
|
||||||
CREATE TABLE information (
|
CREATE TABLE releases (
|
||||||
release_id smallint PRIMARY KEY,
|
id smallint AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
project varchar(32),
|
||||||
|
version varchar(32),
|
||||||
summary varchar(255),
|
summary varchar(255),
|
||||||
homepage varchar(2083),
|
homepage varchar(2083),
|
||||||
email varchar(255),
|
email varchar(255),
|
||||||
FOREIGN KEY (release_id) REFERENCES releases(id),
|
CONSTRAINT integrity UNIQUE (project, version),
|
||||||
FOREIGN KEY (email) REFERENCES contacts(email));
|
FOREIGN KEY (email) REFERENCES contacts(email));
|
||||||
|
|
||||||
CREATE TABLE troves (
|
CREATE TABLE troves (
|
||||||
|
|
|
@ -21,10 +21,10 @@ class CheeseMaker:
|
||||||
self.connection = connect(user=USER, database=DB)
|
self.connection = connect(user=USER, database=DB)
|
||||||
self.connection.autocommit = True
|
self.connection.autocommit = True
|
||||||
self.cursor = self.connection.cursor()
|
self.cursor = self.connection.cursor()
|
||||||
self.classifiers = {}
|
|
||||||
for classifier in classifiers:
|
for classifier in classifiers:
|
||||||
self.insert('troves', classifier=classifier)
|
self.insert('troves', classifier=classifier)
|
||||||
self.classifiers[classifier] = self.cursor.lastrowid
|
self.cursor.execute('SELECT classifier, id FROM troves')
|
||||||
|
self.classifiers = dict(self.cursor.fetchall())
|
||||||
|
|
||||||
def start_soon(self, async_fn: Awaitable, *args: Any) -> None:
|
def start_soon(self, async_fn: Awaitable, *args: Any) -> None:
|
||||||
"""Creates a child task, scheduling await async_fn(*args)."""
|
"""Creates a child task, scheduling await async_fn(*args)."""
|
||||||
|
@ -37,20 +37,25 @@ class CheeseMaker:
|
||||||
f'INSERT IGNORE INTO {table} ({", ".join(items)}) '
|
f'INSERT IGNORE INTO {table} ({", ".join(items)}) '
|
||||||
f'VALUES ({", ".join(map(repr, items.values()))})')
|
f'VALUES ({", ".join(map(repr, items.values()))})')
|
||||||
|
|
||||||
def insert_info(self, release_id: int, info: Dict[str, Any]) -> None:
|
def insert_info(self, info: Dict[str, Any]) -> int:
|
||||||
"""Insert auxiliary information of the given release."""
|
"""Insert auxiliary information of the given release.
|
||||||
|
|
||||||
|
Return the release ID.
|
||||||
|
"""
|
||||||
self.insert('contacts', name=info['author'],
|
self.insert('contacts', name=info['author'],
|
||||||
email=info['author_email'])
|
email=info['author_email'])
|
||||||
self.insert('information', release_id=release_id,
|
self.insert('releases', project=info['name'], version=info['version'],
|
||||||
summary=info['summary'], homepage=info['home_page'],
|
summary=info['summary'], homepage=info['home_page'],
|
||||||
email=info['author_email'])
|
email=info['author_email'])
|
||||||
for classifier in info['classifiers']:
|
release_id = self.cursor.lastrowid
|
||||||
|
for classifier in (info['classifiers'] or []):
|
||||||
self.insert('classifiers', release_id=release_id,
|
self.insert('classifiers', release_id=release_id,
|
||||||
trove_id=self.classifiers[classifier])
|
trove_id=self.classifiers[classifier])
|
||||||
for keyword in (info['keywords'] or '').split(','):
|
for keyword in (info['keywords'] or '').split(','):
|
||||||
self.insert('keywords', release_id=release_id, term=keyword)
|
self.insert('keywords', release_id=release_id, term=keyword)
|
||||||
for dep in (info['requires_dist'] or []):
|
for dep in (info['requires_dist'] or []):
|
||||||
self.insert('dependencies', release_id=release_id, dependency=dep)
|
self.insert('dependencies', release_id=release_id, dependency=dep)
|
||||||
|
return release_id
|
||||||
|
|
||||||
def insert_dist(self, release_id: int,
|
def insert_dist(self, release_id: int,
|
||||||
distributions: List[Dict[str, Any]]) -> None:
|
distributions: List[Dict[str, Any]]) -> None:
|
||||||
|
@ -82,13 +87,12 @@ class CheeseMaker:
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
return
|
return
|
||||||
print('Processing', project_name, version)
|
print('Processing', project_name, version)
|
||||||
self.insert('releases', project=project_name, version=version)
|
release_id = self.insert_info(content['info'])
|
||||||
release_id = self.cursor.lastrowid
|
|
||||||
self.insert_info(release_id, content['info'])
|
|
||||||
self.insert_dist(release_id, content['urls'])
|
self.insert_dist(release_id, content['urls'])
|
||||||
|
|
||||||
async def coagulate(self, project_name: str) -> None:
|
async def coagulate(self, project_name: str) -> None:
|
||||||
"""Fetch project's available versions and metadata."""
|
"""Fetch project's available versions and metadata."""
|
||||||
|
if project_name == 'ccxt': return # ccxt has way too many releases
|
||||||
content = await self.json(f'/pypi/{project_name}/json')
|
content = await self.json(f'/pypi/{project_name}/json')
|
||||||
print('Fetching', project_name)
|
print('Fetching', project_name)
|
||||||
for version in content['releases'].keys():
|
for version in content['releases'].keys():
|
||||||
|
|
Loading…
Reference in New Issue