csschooser/csschooser.py

138 lines
3.1 KiB
Python

from bs4 import BeautifulSoup
import os
import re
import requests
from rich.console import Console
from rich.highlighter import RegexHighlighter
from rich.theme import Theme
import sys
import validators
# Fetch the file or URL as prettyprinted HTML
# Selector editor
# Highlight matching elements
# Tune the selector
# Return selector as output
regex_count = 3
def get_soup(name):
is_url = validators.url(name)
try:
if is_url:
r = re.sub(r"\<(script|style)[\s\S]*?\<\/\1\>", "", requests.get(name).text.replace("\r\n", "\n").replace("\n\r", "\n").strip())
s = BeautifulSoup(r, "html.parser")
else:
with open(name, "r") as f:
s = BeautifulSoup(f.read().strip(), "html.parser")
except:
raise FileNotFoundError
return s
def clear(lines=1, out=True):
if lines < 1:
_ = os.system('cls') if os.name == 'nt' else os.system('clear')
return False
up = '\033[1A'
erase = '\x1b[2K'
s = ""
for _ in range(lines):
if out:
print(up, end=erase)
s += up + erase
return s
def get_regex(s):
global regex_count
s = str(s)
r = re.findall(r"(<[^\>\<]*?>)", s)
if r:
open, close = re.escape(r[0]), re.escape(r[len(r) - 1])
else:
s = re.escape(s)
open = s
close = s
if open != close:
s = r"(\s*){}[\s\S]*?\{}{}".format(open, regex_count, close)
regex_count += 1
return s
def paginate(console, pretty):
with console.pager(styles=True):
console.print(pretty)
def interactive_select(soup):
global regex_count
first = True
full = ""
finalize = ""
sel = "null"
theme = Theme({'selector.elements': 'blue', 'code': 'none', 'reverse': 'none'})
print()
while first or full:
console = Console(highlighter=ClassHighlighter(soup, sel=sel), theme=theme)
old_log = "" if first else pretty
pretty = soup.prettify()
if len(pretty.split("\n")) > os.get_terminal_size()[1]:
paginate(console, pretty)
clear(-1)
elif not first:
clear(len(old_log.split("\n")) + 2)
console.print(pretty)
first = False
finalize = " [Leave empty to exit]"
old = full
full = input(f"\nSelector ({full}){finalize}: ")
sel = full
regex_count = 3
clear(-1)
return old
def main():
while True:
try:
name = input("Filename or URL: ")
soup = get_soup(name)
old = interactive_select(soup)
break
except:
sys.exit("Invalid filename or URL!")
print()
print(f"You chose: {old}")
class ClassHighlighter(RegexHighlighter):
"""Apply style to anything that looks like an email."""
base_style = "selector."
def __init__(self, soup, sel="null"):
s = soup.select(sel)
regex = "(" + "|".join(list(map(get_regex, s))) + ")"
self.highlights = [r"(?P<elements>" + regex + ")"]
if __name__ == "__main__":
main()