diff --git a/README.md b/README.md index 0d3fac6..a212988 100644 --- a/README.md +++ b/README.md @@ -16,5 +16,4 @@ To insert the metadata to the `cheese_shop` database, open a shell and run pip install -r tools/requirements.txt python tools/make-cheeses.py -The script requires Python 3.6 or above. Due to dummy handling of classifiers, -`sql/def.sql` needs to be reloaded before each run. +The script runs on Python 3.6 and above. diff --git a/sql/def.sql b/sql/def.sql index da61270..56c1735 100644 --- a/sql/def.sql +++ b/sql/def.sql @@ -2,22 +2,18 @@ DROP DATABASE IF EXISTS cheese_shop; CREATE DATABASE cheese_shop; USE cheese_shop; -CREATE TABLE releases ( - id smallint AUTO_INCREMENT PRIMARY KEY, - project varchar(32), - version varchar(32), - CONSTRAINT integrity UNIQUE (project, version)); - CREATE TABLE contacts ( email varchar(255) PRIMARY KEY, name varchar(255)); -CREATE TABLE information ( - release_id smallint PRIMARY KEY, +CREATE TABLE releases ( + id smallint AUTO_INCREMENT PRIMARY KEY, + project varchar(32), + version varchar(32), summary varchar(255), homepage varchar(2083), email varchar(255), - FOREIGN KEY (release_id) REFERENCES releases(id), + CONSTRAINT integrity UNIQUE (project, version), FOREIGN KEY (email) REFERENCES contacts(email)); CREATE TABLE troves ( diff --git a/tools/make-cheeses.py b/tools/make-cheeses.py index 1125157..3048fa1 100755 --- a/tools/make-cheeses.py +++ b/tools/make-cheeses.py @@ -21,10 +21,10 @@ class CheeseMaker: self.connection = connect(user=USER, database=DB) self.connection.autocommit = True self.cursor = self.connection.cursor() - self.classifiers = {} for classifier in classifiers: self.insert('troves', classifier=classifier) - self.classifiers[classifier] = self.cursor.lastrowid + self.cursor.execute('SELECT classifier, id FROM troves') + self.classifiers = dict(self.cursor.fetchall()) def start_soon(self, async_fn: Awaitable, *args: Any) -> None: """Creates a child task, scheduling await async_fn(*args).""" @@ -37,20 +37,25 @@ class CheeseMaker: f'INSERT IGNORE INTO {table} ({", ".join(items)}) ' f'VALUES ({", ".join(map(repr, items.values()))})') - def insert_info(self, release_id: int, info: Dict[str, Any]) -> None: - """Insert auxiliary information of the given release.""" + def insert_info(self, info: Dict[str, Any]) -> int: + """Insert auxiliary information of the given release. + + Return the release ID. + """ self.insert('contacts', name=info['author'], email=info['author_email']) - self.insert('information', release_id=release_id, + self.insert('releases', project=info['name'], version=info['version'], summary=info['summary'], homepage=info['home_page'], email=info['author_email']) - for classifier in info['classifiers']: + release_id = self.cursor.lastrowid + for classifier in (info['classifiers'] or []): self.insert('classifiers', release_id=release_id, trove_id=self.classifiers[classifier]) for keyword in (info['keywords'] or '').split(','): self.insert('keywords', release_id=release_id, term=keyword) for dep in (info['requires_dist'] or []): self.insert('dependencies', release_id=release_id, dependency=dep) + return release_id def insert_dist(self, release_id: int, distributions: List[Dict[str, Any]]) -> None: @@ -82,13 +87,12 @@ class CheeseMaker: except JSONDecodeError: return print('Processing', project_name, version) - self.insert('releases', project=project_name, version=version) - release_id = self.cursor.lastrowid - self.insert_info(release_id, content['info']) + release_id = self.insert_info(content['info']) self.insert_dist(release_id, content['urls']) async def coagulate(self, project_name: str) -> None: """Fetch project's available versions and metadata.""" + if project_name == 'ccxt': return # ccxt has way too many releases content = await self.json(f'/pypi/{project_name}/json') print('Fetching', project_name) for version in content['releases'].keys():