You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ao3_scraper/export_tags.py

205 lines
6.4 KiB

"""
Write tags to TagSpaces, wutag and xattr to use with Dolphin.
Usage:
export_tags.py [--folder=<folder>]
Options:
-f, --folder=<folder> Where to search for metadata files
"""
import logging
import shutil
import json
import os
import subprocess
import xattr
from datetime import datetime
from docopt import docopt
from pathlib import Path
from typing import Dict, List
FILETYPES = ['epub', 'pdf']
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler("debug.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def main():
global ARGS
ARGS = docopt(__doc__)
if ARGS['--folder']:
folder = ARGS['--folder']
else:
folder = os.getcwd()
process_files_not_in_hidden_folder(folder)
leaves = find_leaf_dirs(Path(folder))
process_files_in_leaf_dir(leaves)
def process_files_not_in_hidden_folder(path: str) -> None:
for root, subdirs, filenames in os.walk(path):
subdirs[:] = [d for d in subdirs if not d[0] == '.']
for filename in filenames:
if filename.endswith('.json'):
meta_file = os.path.join(root, filename)
logger.debug(f"Processing {meta_file}")
export_tags(meta_file)
def process_files_in_leaf_dir(leaves: List[Path]) -> None:
for dir in leaves:
for meta_file in dir.glob("*.json"):
logger.debug(f"Processing {meta_file}")
export_tags(meta_file)
def find_leaf_dirs(root_path: Path) -> Path:
# filter subdirectories
child_dirs = [path for path in root_path.iterdir() if path.is_dir()]
# if no child_dir, yield & return
if not child_dirs:
yield root_path
return
# otherwise iter tru subdir
for path in child_dirs:
# ignore hidden dir
if path.stem[0] == ".":
continue
# step in and recursive yield
yield from find_leaf_dirs(path)
def export_tags(meta_file: str, filetypes: List[str] = FILETYPES) -> None:
parent_folder = Path(meta_file).parent
metadata = get_metadata(meta_file)
tags = get_tags_from_metadata(metadata)
# If each of this programs is installed, export tags for them
if shutil.which('tagspaces') is not None:
write_tagspaces(parent_folder, metadata, tags, filetypes=filetypes)
write_dolphin_xattr(parent_folder, metadata, tags, filetypes=filetypes)
if shutil.which('wutag') is not None:
write_wutag_tags(parent_folder, tags, filetypes=filetypes)
if shutil.which('tmsu') is not None:
write_tmsu_tags(parent_folder, metadata, tags, filetypes=filetypes)
if shutil.which('tag') is not None:
write_supertag_tags(parent_folder, tags, filetypes=filetypes)
def get_metadata(meta_file: str) -> Dict:
with open(meta_file) as json_file:
metadata = json.load(json_file)
return metadata
def get_tags_from_metadata(meta: Dict) -> List[str]:
tags = []
for key in meta.keys():
if not meta[key]:
continue
if key in ['fandoms', 'rating', 'categories', 'warnings', 'status', 'relationships', 'characters', 'tags', 'language']:
if isinstance(meta[key], list):
tags.extend(meta[key])
else:
tags.append(meta[key])
elif key == 'score':
tags.append(meta[key] + ' stars')
elif key == 'date_updated':
year = meta[key].split('-')[0]
tags.append(year)
return tags
def create_tagspaces_dict(tags: List[str]) -> Dict:
data = {}
data['tags'] = []
for tag in tags:
data['tags'].append({
'title': tag,
'color': '#444444',
'textcolor': '#ffffff',
'type': 'sidecar'
})
data['appName'] = 'TagSpaces'
data['appVersion'] = '4.4.3'
data['lastUpdated'] = datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
return data
def write_tagspaces(parent_folder: str, metadata: Dict, tags: List[str], filetypes: List[str] = FILETYPES) -> None:
"""
Write metadata to tagspaces file
"""
tagspaces_metadata = create_tagspaces_dict(tags)
for filetype in filetypes:
filename = f"{metadata['title']}.{filetype}.json"
tagspaces_file = os.path.join(parent_folder, '.ts', filename)
os.makedirs(os.path.dirname(tagspaces_file), exist_ok=True)
write_json(tagspaces_file, tagspaces_metadata)
def write_json(filename: str, d: Dict) -> None:
with open(filename, 'w') as f:
json.dump(d, f)
def write_wutag_tags(parent_folder: str, tags: List[str], filetypes: List[str] = FILETYPES) -> None:
for filetype in filetypes:
filename = os.path.basename(parent_folder) + '.' + filetype
subprocess.run(['wutag', 'set', filename, *tags], cwd=parent_folder)
def write_tmsu_tags(parent_folder: str, metadata: Dict, tags: List[str], filetypes: List[str] = FILETYPES) -> None:
if 'date_updated' in metadata:
year = metadata['date_updated'].split('-')[0]
for filetype in filetypes:
filename = os.path.basename(parent_folder) + '.' + filetype
cmd = ['tmsu', 'tag', filename, *tags]
if 'date_updated' in metadata:
cmd.append(f'year={year}')
subprocess.run(cmd, cwd=parent_folder)
def write_supertag_tags(parent_folder: str, tags: List[str], filetypes: List[str] = FILETYPES) -> None:
for filetype in filetypes:
filename = os.path.basename(parent_folder) + '.' + filetype
subprocess.run(['tag', 'ln', filename, "/".join(tags)], cwd=parent_folder)
def write_dolphin_xattr(parent_folder: str, metadata: Dict, tags: List[str], filetypes: List[str] = FILETYPES) -> None:
stars = metadata['score']
comment = metadata['comment']
for filetype in filetypes:
filepath = get_filepath(parent_folder, filetype)
tags = ",".join(tags)
try:
xattr.setxattr(filepath, 'user.xdg.tags', tags.encode('utf-8'))
except: pass
try:
xattr.setxattr(filepath, 'user.xdg.comment', comment.encode('utf-8'))
except: pass
try:
xattr.setxattr(filepath, 'user.baloo.rating', stars.encode('utf-8'))
except: pass
def get_filepath(parent_folder: str, filetype: str) -> str:
filename = os.path.basename(parent_folder)
filepath = os.path.join(parent_folder, filename + '.' + filetype)
return filepath
if __name__ == '__main__':
main()