Complete cheesemaking script

This commit is contained in:
Nguyễn Gia Phong 2020-07-04 23:05:06 +07:00
parent 3fcc8dd998
commit b8f160bf93
5 changed files with 94 additions and 18 deletions

View File

@ -1,2 +1,20 @@
# cheese-shop
Well, it's certainly uncontaminated by cheese.
## Setup
In MySQL, first load the database definition:
SOURCE sql/def.sql;
Then, create an user named `wensleydale` with appropriate permissions
to be used by `mysql.connector`:
SOURCE sql/ctl.sql;
To insert the metadata to the `cheese_shop` database, open a shell and run
pip install -r tools/requirements.txt
python tools/make-cheeses.py
The script requires Python 3.6 or above. Due to dummy handling of classifiers,
`sql/def.sql` needs to be reloaded before each run.

View File

@ -1,12 +1,12 @@
USE mysql;
DROP user IF EXISTS wensleydale@localhost;
DROP USER IF EXISTS wensleydale@localhost;
CREATE USER wensleydale@localhost
IDENTIFIED BY '';
GRANT ALL PRIVILEGES ON cheese_shop.*
TO 'wensleydale'@'localhost';
TO wensleydale@localhost;
UPDATE user SET plugin='mysql_native_password' WHERE User='wensleydale';
FLUSH PRIVILEGES;

View File

@ -5,7 +5,8 @@ USE cheese_shop;
CREATE TABLE releases (
id smallint AUTO_INCREMENT PRIMARY KEY,
project varchar(32),
version varchar(32));
version varchar(32),
CONSTRAINT integrity UNIQUE (project, version));
CREATE TABLE contacts (
email varchar(255) PRIMARY KEY,
@ -21,7 +22,7 @@ CREATE TABLE information (
CREATE TABLE troves (
id smallint AUTO_INCREMENT PRIMARY KEY,
classifier varchar(255));
classifier varchar(255) UNIQUE);
CREATE TABLE classifiers (
release_id smallint,
@ -36,14 +37,20 @@ CREATE TABLE keywords (
PRIMARY KEY (release_id, term),
FOREIGN KEY (release_id) REFERENCES releases(id));
CREATE TABLE dependencies (
release_id smallint,
dependency varchar(64),
PRIMARY KEY (release_id, dependency),
FOREIGN KEY (release_id) REFERENCES releases(id));
CREATE TABLE distributions (
release_id smallint,
filename varchar(255),
size int,
url varchar(255),
dist_type varchar(16),
python_version varchar(8),
requires_python varchar(32),
requires_dist varchar(64),
size int,
sha256 char(64),
md5 char(32),
PRIMARY KEY (release_id, filename),

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python3
from typing import Any, Awaitable, KeysView
from json import JSONDecodeError
from typing import Any, Awaitable, Dict, Iterable, List
from asks.sessions import Session
from mysql.connector import connect
from trio import Nursery, open_nursery, run
from trove_classifiers import classifiers
USER = 'wensleydale'
DB = 'cheese_shop'
@ -12,35 +14,84 @@ CONNECTIONS = 20
class CheeseMaker:
"""Cheese maker for Mr Wensleydale's cheese shop."""
def __init__(self, nursery: Nursery, session: Session) -> None:
self.nursery, self.session = nursery, session
self.cursor = connect(user=USER, database=DB).cursor()
self.connection = connect(user=USER, database=DB)
self.connection.autocommit = True
self.cursor = self.connection.cursor()
self.classifiers = {}
for classifier in classifiers:
self.insert('troves', classifier=classifier)
self.classifiers[classifier] = self.cursor.lastrowid
def start_soon(self, async_fn: Awaitable, *args: Any) -> None:
"""Creates a child task, scheduling await async_fn(*args)."""
self.nursery.start_soon(async_fn, *args)
def insert(self, table: str, **kwargs: str) -> None:
"""Insert items into the given table."""
items = {k: v for k, v in kwargs.items() if v is not None}
self.cursor.execute(
f'INSERT IGNORE INTO {table} ({", ".join(items)}) '
f'VALUES ({", ".join(map(repr, items.values()))})')
def insert_info(self, release_id: int, info: Dict[str, Any]) -> None:
"""Insert auxiliary information of the given release."""
self.insert('contacts', name=info['author'],
email=info['author_email'])
self.insert('information', release_id=release_id,
summary=info['summary'], homepage=info['home_page'],
email=info['author_email'])
for classifier in info['classifiers']:
self.insert('classifiers', release_id=release_id,
trove_id=self.classifiers[classifier])
for keyword in (info['keywords'] or '').split(','):
self.insert('keywords', release_id=release_id, term=keyword)
for dep in (info['requires_dist'] or []):
self.insert('dependencies', release_id=release_id, dependency=dep)
def insert_dist(self, release_id: int,
distributions: List[Dict[str, Any]]) -> None:
"""Insert distribution information of the given release."""
for dist in distributions:
self.insert('distributions', release_id=release_id,
filename=dist['filename'], size=dist['size'],
url=dist['url'], dist_type=dist['packagetype'],
python_version=dist['python_version'],
requires_python=dist['requires_python'],
sha256=dist['digests']['sha256'],
md5=dist['digests']['md5'])
async def json(self, path: str) -> Any:
"""Return the JSON response to the given GET request."""
response = await self.session.get(
path=path, headers={'Accept': 'application/json'})
return response.json()
async def culture(self) -> KeysView[str]:
async def culture(self) -> Iterable[str]:
"""Return the 100 most popular cheeses in cheese shop."""
stats = await self.json('/stats')
return stats['top_packages'].keys()
async def drain(self, project_name: str, version: str) -> None:
"""Fetch metadata of the given distribution."""
# XXX: insert the fetched metadata to a database
await self.json(f'/pypi/{project_name}/{version}/json')
print(project_name, version)
try:
content = await self.json(f'/pypi/{project_name}/{version}/json')
except JSONDecodeError:
return
print('Processing', project_name, version)
self.insert('releases', project=project_name, version=version)
release_id = self.cursor.lastrowid
self.insert_info(release_id, content['info'])
self.insert_dist(release_id, content['urls'])
async def coagulate(self, project_name: str) -> None:
"""Fetch project's available versions and metadata."""
response = await self.json(f'/pypi/{project_name}/json')
for version in response['releases'].keys():
content = await self.json(f'/pypi/{project_name}/json')
print('Fetching', project_name)
for version in content['releases'].keys():
# Recklessly filter out prereleases
for n in version.split('.'):
try:
@ -51,13 +102,12 @@ class CheeseMaker:
self.start_soon(self.drain, project_name, version)
async def main(session: Session):
async def main():
"""Make cheeses."""
async with open_nursery() as nursery:
maker = CheeseMaker(nursery, session)
maker = CheeseMaker(nursery, Session(INDEX, connections=CONNECTIONS))
for project_name in await maker.culture():
maker.start_soon(maker.coagulate, project_name)
if __name__ == '__main__':
run(main, Session(INDEX, connections=CONNECTIONS))
if __name__ == '__main__': run(main)

View File

@ -1,3 +1,4 @@
asks
mysql.connector
trio
trove-classifiers