diff --git a/report/erd.jpg b/report/erd.jpg deleted file mode 100644 index 03199cd..0000000 Binary files a/report/erd.jpg and /dev/null differ diff --git a/report/erd.png b/report/erd.png new file mode 100644 index 0000000..6ebfb1c Binary files /dev/null and b/report/erd.png differ diff --git a/report/report.pdf b/report/report.pdf index bafa926..2383b9a 100644 Binary files a/report/report.pdf and b/report/report.pdf differ diff --git a/report/report.tex b/report/report.tex index 14902fb..8365d31 100644 --- a/report/report.tex +++ b/report/report.tex @@ -95,10 +95,10 @@ which supports queris similar to the following: \item \verb|belong_to(name)|: Retrieve a list of projects whose author is \verb|name|. \item \verb|browse(classifier)|: Retrieve a list of (\verb|project|, - \verb|version|) of all releases classified with the given classifier. + \verb|version|) of all releases classified by the given \verb|classifier|. \item \verb|release_data(project, version)|: Retrieve the following metadata matching the given release: project, version, homepage, author, - author's email, summary, keywords, classifiers and dependencies + author's email, summary, keywords, classifiers and dependencies. \item \verb|search_name(pattern)|: Retrieve a list of (\verb|project|, \verb|version|, \verb|summary|) where the project name matches the pattern. \item \verb|search_summary(pattern)|: Retrieve a list of (\verb|project|, @@ -127,7 +127,7 @@ its entity set of data extracted from projects: relate to only one releases, but many distributions could be released in the same releases. \end{itemize} -\includegraphics[width=\textwidth]{erd.jpg} +\includegraphics[width=\textwidth]{erd.png} \newpage \subsection{Database Schema} @@ -261,6 +261,10 @@ BEGIN SELECT classifier FROM classifiers, troves WHERE release_id = i AND trove_id = troves.id; + + SELECT dependency + FROM dependencies + WHERE release_id = i; END// DELIMITER ; \end{verbatim} @@ -273,7 +277,7 @@ FROM releases WHERE project LIKE 'py%'; \end{verbatim} -\subsection{Search project name by summary} +\subsection{Project Search by Summary} To retrieve project by summary matching the SQL pattern \verb|%num%| \begin{verbatim} SELECT project, version, summary diff --git a/report/schema.png b/report/schema.png index a8b7782..0e0a3dc 100644 Binary files a/report/schema.png and b/report/schema.png differ diff --git a/slides/erd.png b/slides/erd.png new file mode 120000 index 0000000..a5f5621 --- /dev/null +++ b/slides/erd.png @@ -0,0 +1 @@ +../report/erd.png \ No newline at end of file diff --git a/slides/schema.png b/slides/schema.png new file mode 120000 index 0000000..ba4f05b --- /dev/null +++ b/slides/schema.png @@ -0,0 +1 @@ +../report/schema.png \ No newline at end of file diff --git a/slides/slides.pdf b/slides/slides.pdf index 4d8d812..91be304 100644 Binary files a/slides/slides.pdf and b/slides/slides.pdf differ diff --git a/slides/slides.tex b/slides/slides.tex index af92c9f..30f4c64 100644 --- a/slides/slides.tex +++ b/slides/slides.tex @@ -6,6 +6,7 @@ \usepackage{hyperref} \usepackage{lmodern} \usepackage{siunitx} +\newcommand{\byte}{B} \mode{} \usetheme[hideothersubsections]{Hannover} @@ -27,15 +28,190 @@ \begin{document} \frame{\titlepage} \selectlanguage{english} + +\section{Introduction} \begin{frame}{Contents} \tableofcontents \end{frame} -\section{Introduction} +\begin{frame}{Why?}\Large + \begin{itemize} + \item Python package managers download\\ + whole packages just for metadata + \item Mirroring PyPI is expensive (\SI{6}{\giga\byte}) + \item Middle approach: Mirroring metadata + \end{itemize} +\end{frame} + +\section{User Requirements} \frame{\tableofcontents[currentsection]} +\begin{frame}[fragile]{Tasks} + \begin{itemize} + \item \verb|list_projects()|\\ + List of registered project names. + \item \verb|project_releases(project)|\\ + List of releases for given \verb|project|, ordered by version. + \item \verb|project_release_latest()|\\ + Latest release of given \verb|project|. + \item \verb|belong_to(name)|\\ + List of projects whose author is \verb|name|. + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Tasks (cont.)} + \begin{itemize} + \item \verb|browse(classifier)|: List of (\verb|project|, + \verb|version|) of all releases classified by \verb|classifier|. + \item \verb|release_data(project, version)|: Metadata of given release: + project, version, homepage, author, author's email, summary, keywords, + classifiers and dependencies + \item \verb|search_name(pattern)|: List of (\verb|project|, + \verb|version|, \verb|summary|) where \verb|name| matches \verb|pattern|. + \item \verb|search_summary(pattern)|: List of (\verb|project|, + \verb|version|, \verb|summary|) where \verb|summary| matches \verb|pattern|. + \end{itemize} +\end{frame} + +\section{Data Definition} +\frame{\tableofcontents[currentsection]} +\subsection{Entity Relationship Diagram} +\begin{frame}{Entity Relationship Diagram} + \begin{center} + \includegraphics[width=0.8\textwidth]{erd.png} + \end{center} +\end{frame} + +\subsection{Database Schema} +\begin{frame}{Database Schema} + \begin{center} + \includegraphics[width=\textwidth]{schema.png} + \end{center} +\end{frame} + +\section{Data Query} +\frame{\tableofcontents[currentsection]} +\begin{frame}[fragile]{Project Listing}\huge +\begin{verbatim} +SELECT DISTINCT project +FROM releases; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Project Release Listing}\LARGE +\begin{verbatim} +SELECT version +FROM releases +WHERE project = 'spam' +ORDER BY version; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Project Latest Release}\LARGE +\begin{verbatim} +SELECT version +FROM releases +WHERE project = 'spam' +ORDER BY version DESC +LIMIT 1; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Project Listing by Author (view)}\Large +\begin{verbatim} +CREATE VIEW authorships +AS SELECT name as author, project +FROM contacts NATURAL JOIN releases +GROUP BY author, project; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Project Listing by Author (query)}\Large +\begin{verbatim} +SELECT project +FROM authorships +WHERE author='Monty Python'; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Browse by Classifier}\large +\begin{verbatim} +DELIMITER // +CREATE PROCEDURE browse(class varchar(255)) +BEGIN + SELECT project, version + FROM releases, classifiers + WHERE id = release_id AND trove_id = ( + SELECT id + FROM troves + WHERE classifier = class); +END// +DELIMITER ; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Release Metadata} +\begin{verbatim} +DELIMITER // +CREATE PROCEDURE release_data( + project varchar(32), version varchar(32)) +BEGIN + DECLARE i smallint; + SET i = ( + SELECT id + FROM releases + WHERE releases.project = project + AND releases.version = version); + SELECT project, version, homepage, + name as author, email, summary + FROM releases NATURAL JOIN contacts + WHERE id = i; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Release Metadata (cont.)} +\begin{verbatim} + SELECT term as keyword + FROM keywords + WHERE release_id = i; + + SELECT classifier + FROM classifiers, troves + WHERE release_id = i AND trove_id = troves.id; + + SELECT dependency + FROM dependencies + WHERE release_id = i; +END// +DELIMITER ; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Project Search by Name}\Large +\begin{verbatim} +SELECT project, version, summary +FROM releases +WHERE project LIKE 'py%'; +\end{verbatim} +\end{frame} + +\begin{frame}[fragile]{Project Search by Summary}\Large +\begin{verbatim} +SELECT project, version, summary +FROM releases +WHERE summary LIKE '%num%'; +\end{verbatim} +\end{frame} \section{Conclusion} \frame{\tableofcontents[currentsection]} +\begin{frame}{What We Learnt}\Large + \begin{itemize}\Large + \item Relational databases + \item SQL---MySQL in particular + \item Python package metadata format + \end{itemize} +\end{frame} + \begin{frame}{Copying}\Large \begin{center} \includegraphics[width=0.2\textwidth]{CC.png} diff --git a/slides/slides.vrb b/slides/slides.vrb new file mode 100644 index 0000000..40c3450 --- /dev/null +++ b/slides/slides.vrb @@ -0,0 +1,7 @@ +\frametitle{Project Search by Summary} +\Large +\begin{verbatim} +SELECT project, version, summary +FROM releases +WHERE summary LIKE '%num%'; +\end{verbatim} diff --git a/sql/def.sql b/sql/def.sql index e4beaea..4283728 100644 --- a/sql/def.sql +++ b/sql/def.sql @@ -92,5 +92,9 @@ BEGIN SELECT classifier FROM classifiers, troves WHERE release_id = i AND trove_id = troves.id; + + SELECT dependency + FROM dependencies + WHERE release_id = i; END// DELIMITER ;