maintenance/hydra/goggles.scm

530 lines
17 KiB
Scheme
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/run/current-system/profile/bin/guile \
--no-auto-compile -e main -s
!#
;; IRC log viewer
;; Copyright © 2019, 2020, 2021, 2022 Ricardo Wurmus <ludo@gnu.org>
;; Copyright © 2020 Ludovic Courtès <ludo@gnu.org>
;; Copyright © 2020 Andreas Enge <andreas@enge.fr>
;; Copyright © 2022 Tobias Geerinckx-Rice <me@tobias.gr>
;; Released under the GNU GPLv3 or any later version.
(use-modules (web http)
(web request)
(web response)
(web server)
(web uri)
(sxml simple)
(srfi srfi-1)
(srfi srfi-26)
(ice-9 ftw)
(ice-9 match)
(ice-9 rdelim)
(ice-9 regex)
(ice-9 textual-ports)
(xapian wrap)
(xapian xapian))
(define %log-root "/var/www/.well-known/all-logs/")
(define %log-xapian-db "/var/cache/logs.xapian/")
(define %config
'((host . "127.0.0.1")
(port . 3333)
(channels . ("guix" "guix-hpc" "guile" "guile-steel" "bootstrappable" "hurd"
"ocapn" "spritely"))))
(define file-mime-types
'(("css" . (text/css))
("js" . (text/javascript))
("png" . (image/png))
("gif" . (image/gif))
("woff" . (application/font-woff))
("ttf" . (application/octet-stream))
("html" . (text/html))))
(define (channel-files channel)
(reverse (scandir (string-append %log-root "/#" channel "/")
(lambda (name)
(not (member name '("." ".." "index.html")))))))
(define* (index-text!* term-generator text #:key (wdf-increment 1) prefix)
(apply TermGenerator-index-text-without-positions
term-generator text wdf-increment
(if prefix (list prefix) '())))
(define (index-channel-logs channel)
"Index all messages in the logs for CHANNEL."
(define new?
(let* ((file (string-append %log-xapian-db "/postlist.glass"))
(reference
(and (file-exists? file)
(and=> (stat file) stat:mtime))))
(lambda (file)
(if reference
(> (and=> (stat file) stat:mtime) reference)
#t))))
(format (current-error-port) "Indexing ~a~%" channel)
(call-with-writable-database %log-xapian-db
(lambda (db)
(for-each
(lambda (file-name)
(define stamp (basename file-name ".log"))
(define (index-message line count)
(match (string-split line #\space)
((time "***" . msg) #f)
((time nick . msg)
(let* ((idterm (string-append "Q" channel stamp (number->string count)))
(doc (make-document
#:data (call-with-output-string
(lambda (port)
(write
`((stamp . ,stamp)
(id . ,(string-filter char-set:digit time))
(text . ,line))
port)))
#:terms `((,idterm . 0))))
(term-generator
(make-term-generator #:stem (make-stem "en")
#:document doc)))
(index-text!* term-generator channel #:prefix "B")
(index-text!* term-generator nick #:prefix "A")
(index-text!* term-generator line)
(replace-document! db idterm doc)))
(_ #f)))
(format (current-error-port)
" Indexing file ~a~%" file-name)
(with-input-from-file file-name
(lambda ()
(let loop ((line (read-line))
(count 0))
(unless (eof-object? line)
(index-message line count)
(loop (read-line) (1+ count)))))))
(filter new?
(map (cut string-append %log-root "/#" channel "/" <>)
(channel-files channel))))))
(format (current-error-port) "Indexing ~a complete!~%" channel))
(define* (parse-query* querystring #:key stemmer stemming-strategy
(prefixes '())
(boolean-prefixes '()))
(let ((queryparser (new-QueryParser)))
(QueryParser-set-stemmer queryparser stemmer)
(when stemming-strategy
(QueryParser-set-stemming-strategy queryparser stemming-strategy))
(for-each (match-lambda
((field . prefix)
(QueryParser-add-prefix queryparser field prefix)))
prefixes)
(for-each (match-lambda
((field . prefix)
(QueryParser-add-boolean-prefix queryparser field prefix)))
boolean-prefixes)
(let ((query (QueryParser-parse-query queryparser querystring)))
(delete-QueryParser queryparser)
query)))
(define* (search querystring #:key (pagesize 100))
(call-with-database %log-xapian-db
(lambda (db)
(let* ((query (parse-query* querystring
#:stemmer (make-stem "en")
#:boolean-prefixes
'(("channel" . "B")
("nick" . "A"))))
(enq (enquire db query)))
(Enquire-set-sort-by-value enq 0 #f)
(reverse (mset-fold (lambda (item acc)
(cons (call-with-input-string
(document-data (mset-item-document item))
read)
acc))
'()
(enquire-mset enq
#:maximum-items pagesize)))))))
(define (render-html sxml)
(list '((content-type . (text/html)))
(lambda (port)
(display "<!DOCTYPE html>" port)
(sxml->xml sxml port))))
(define css
"\
html {
background: #fdfdfd;
}
h1 {
font-weight: 300;
}
h2 {
font-weight: 200;
}
h3 {
padding: .5em 0;
border-top: 3px dotted #ddd;
margin-bottom: 0;
}
form {
width: 400px;
display: flex;
}
input {
width: 100%;
display: flex;
border-radius: .25em 0 0 .25em;
border: 1px solid #aaa;
border-right: 0;
padding: 0.5em;
}
button {
display: flex;
border-radius: 0 .25em .25em 0;
background-color: #007bff;
border: 1px solid #007bff;
padding: .5em;
cursor: pointer;
color: white;
}
button:hover {
background-color: #0069d9;
border-color: #0062cc;
}
a {
color: #007bff;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
h4 {
margin-bottom: .5em;
}
table td {
padding: 0.75em;
}
table tr:hover {
background: #eee;
}
.year {
display: table;
}
.month {
display: table-cell;
padding-right: 1em;
}
ul {
margin: 0;
padding: 0;
list-style: none;
}
.nick {
padding-right: 0.6rem;
font-weight: bold;
text-align: right;
width: 13rem;
display: table-cell;
}
.nick a {
color: inherit;
text-decoration: none;
}
.message {
display: table-cell;
padding-left: 0.6rem;
border-left: 2px solid #333;
}
.notice {
color: #859900;
font-style: italic;
}
.line {
line-height: 1.8rem;
display: table;
}
#logs {
margin-top: 1.5rem;
padding: 1.5rem;
}
")
(define looking-glass
;; This SVG is part of Bootstrap and is available
;; under the Expat license.
'(svg
(@ (class "bi bi-search")
(width "1em")
(height "1em")
(viewBox "0 0 16 16")
(fill "currentColor")
(xmlns "http://www.w3.org/2000/svg"))
(title "Search")
(path (@ (fill-rule "evenodd")
(d "M10.442 10.442a1 1 0 011.415 0l3.85 3.85a1 1 0 01-1.414 1.415l-3.85-3.85a1 1 0 010-1.415z")
(clip-rule "evenodd")) "")
(path (@ (fill-rule "evenodd")
(d "M6.5 12a5.5 5.5 0 100-11 5.5 5.5 0 000 11zM13 6.5a6.5 6.5 0 11-13 0 6.5 6.5 0 0113 0z")
(clip-rule "evenodd")) "")))
(define colors
(circular-list "#389600" "#8dd3c7" "#2e2a4a" "#6b8072"
"#80b1d3" "#6d2462" "#234e69" "#6c3d55"
"#d9d9d9" "#bc80bd" "#3c5b35" "#af8d2f"))
(define (not-found uri)
(list (build-response #:code 404)
(string-append "Resource not found: " (uri->string uri))))
(define (directory? filename)
(string=? filename (dirname filename)))
(define linkify-regexp
;; Rather than attempt to write a valid URL regexp, the first few
;; on-line examples of which looked suspect, exclude a few characters
;; commonly observed in practice, e.g.:
;; <nckx> Guix is great! (source: <https://guix.gnu.org>)
;; XXX This and the regexp-exec code below assume max. 1 URL per token. OK?
(make-regexp "https?://[^][><)('\",]+" regexp/icase))
(define (make-line-renderer lines)
"Return a procedure that converts a line into an SXML
representation highlighting certain parts."
(define participants
(delete-duplicates (filter-map (match-lambda
((_ nick . anything) nick)
(_ #f))
lines)
string=?))
(define (nick-color who)
(or (and=> (assoc-ref (zip participants colors) who)
first)
(first colors)))
(match-lambda
(("") '(br))
;; Filter noise.
((time "***" (or "Joins:" "Parts:" "Quits:") . msg)
'())
((time "***" . msg)
(let ((id (string-filter char-set:digit time)))
`(div (@ (class "line") (id ,id))
(span (@ (class "nick")) "***")
(span (@ (class "message notice")) ,(string-join msg)))))
((time nick . rest)
(let ((id (string-filter char-set:digit time)))
`(div (@ (class "line") (id ,id))
(span (@ (class "nick")
(style ,(string-append "color:" (nick-color nick))))
(a (@ (href ,(string-append "#" id))
(label ,time))
,nick))
(span (@ (class "message"))
,@(reverse (fold (lambda (chunk acc)
(let* ((m (regexp-exec linkify-regexp
chunk)))
(cond
((regexp-match? m)
(let ((url (match:substring m)))
(cons* " "
(match:suffix m)
`(a (@ (rel "nofollow")
(href ,url))
,url)
(match:prefix m)
" "
acc)))
(else
(match acc
(((? string? s) . rest)
(cons (string-append s " " chunk)
(cdr acc)))
(_ (cons chunk acc)))))))
'() rest))))))))
(define (render-log channel root path)
;; PATH is a list of path components
(let ((file-name (string-join (cons* root path) "/")))
(if (and (not (any (cut string-contains <> "..") path))
(file-exists? file-name)
(not (directory? file-name)))
(let* ((text (call-with-input-file file-name get-string-all))
(lines (string-split text #\newline))
(split-lines (map (cut string-split <> #\space) lines))
(handle-line (make-line-renderer split-lines)))
(render-html
`(html
(head (title "IRC channel logs")
(style ,css))
(body
(h1 "IRC channel logs")
(h2 ,path)
(p (a (@ (href ,(string-append "/" channel)))
"back to list of logs"))
(div (@ (id "logs"))
,@(map handle-line split-lines))))))
(not-found (build-uri 'http
#:host (assoc-ref %config 'host)
#:port (assoc-ref %config 'port)
#:path (string-join path "/" 'prefix))))))
(define (group-by proc files)
(define (inner init files)
(call-with-values
(lambda ()
(span (lambda (file-name)
(string=? (proc file-name) init))
files))
(lambda (group rest)
(if (null? rest)
(list group)
(cons group
(inner (proc (first rest))
rest))))))
(inner (proc (first files)) files))
(define (index channel)
(define (drop-extension file-name)
(let ((dot (string-rindex file-name #\.)))
(if dot
(substring file-name 0 dot)
file-name)))
(define (get-year file-name)
(first (string-split file-name #\-)))
(define (get-month file-name)
(second (string-split file-name #\-)))
(define group-by-year
(cut group-by get-year <>))
(define group-by-month
(cut group-by get-month <>))
`(html
(head (title ,(string-append channel " IRC channel logs"))
(style ,css))
(body
(h1 ,(string-append channel " IRC channel logs"))
(p ,(format #f "These are the channel logs for the #~a IRC channel on
Libera.Chat." channel))
(form
(@ (action ,(string-append "/" channel "/search")))
(input (@ (name "query")
(placeholder "nick:rekado frobnicate")))
(button (@ (type "submit")) ,looking-glass))
(div
(@ (class "years"))
,@(map (lambda (files)
`((h3 ,(get-year (first files)))
(div
(@ (class "year"))
,@(map (lambda (files)
`((div
(@ (class "month"))
(h4 ,(get-month (first files)))
(ul
,@(map (lambda (file)
`(li (a (@ (href ,(string-append "/" channel "/" file)))
,(drop-extension file))))
files)))))
(group-by-month files)))))
(group-by-year (channel-files channel)))))))
(define (search-results channel query)
`(html
(head (title ,(string-append "Search " channel " IRC channel logs"))
(style ,css))
(body
(h1 ,(string-append "Search " channel " IRC channel logs"))
(form
(@ (action ,(string-append "/" channel "/search")))
(input (@ (name "query")
(placeholder "nick:rekado frobnicate")))
(button (@ (type "submit")) ,looking-glass))
,@(if (and query (not (string-null? query)))
(let* ((results (search (format #f "channel:~a ~a" channel query))))
`((p "These are the channel logs matching your query " (code ,query))
(table
(tbody
,(map (lambda (result)
`(tr (td
(a (@ (href ,(string-append "/" channel "/"
(assoc-ref result 'stamp)
".log#"
(assoc-ref result 'id))))
,(assoc-ref result 'stamp)))
(td ,(assoc-ref result 'text))))
results)))))
'()))))
(define-syntax-rule (-> target functions ...)
(fold (lambda (f val) (and=> val f))
target
(list functions ...)))
(define (parse-query-string query)
"Parse and decode the URI query string QUERY and return an alist."
(let lp ((lst (map uri-decode (string-split query (char-set #\& #\=)))))
(match lst
((key value . rest)
(cons (cons key value) (lp rest)))
(() '()))))
(define (%controller request)
(match-lambda
(('GET)
(render-html (index "guix")))
(('GET (? (cut member <> (assoc-ref %config 'channels)) channel) "search")
(let ((query (-> request
request-uri
uri-query
parse-query-string
(cut assoc-ref <> "query"))))
(render-html (search-results channel query))))
(('GET (? (cut member <> (assoc-ref %config 'channels)) channel))
(render-html (index channel)))
(('GET (? (cut member <> (assoc-ref %config 'channels)) channel) path ...)
(render-log channel (string-append %log-root "/#" channel "/") path))
(('GET path ...)
(render-log "guix" (string-append %log-root "/#guix/") path))))
(define (request-path-components request)
(split-and-decode-uri-path (uri-path (request-uri request))))
(define (handler request . _)
(apply values ((%controller request)
(cons (request-method request)
(request-path-components request)))))
(define (main args)
(match args
((_ "index")
(for-each index-channel-logs
(assoc-ref %config 'channels))
#t)
(_
(let ((port (assoc-ref %config 'port))
(host (assoc-ref %config 'host)))
(run-server handler
'http
`(#:addr ,(inet-pton AF_INET host)
#:port ,port))))))