mkv-this/mkv_this/mkv_this.py

251 lines
8.4 KiB
Python
Executable File

#! /usr/bin/env python3
"""
mkv-this: input text and/or url, output markovified text.
Copyright (C) 2020 martianhiatus@riseup.net.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
import markovify
import os
import sys
import argparse
from .functions import (
URL,
convert_html,
dir_list,
dir_cat,
read,
mkbtext,
mkbnewline,
writesentence,
writeshortsentence,
)
# argparse
def parse_the_args():
parser = argparse.ArgumentParser(
prog="mkv-this",
description="markovify local text files or URLs and output the results to a local text file.",
epilog="may you find many prophetic énoncés in your virtual bird guts! Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial.",
)
# positional args:
parser.add_argument(
"infile", help="the text file to process. NB: file cannot be empty."
)
parser.add_argument(
"outfile",
nargs="?",
default="./mkv-output.txt",
help="the file to save to. if the file is used more than once, subsequent literature will be appended to it. defaults to ./mkv-output.txt.",
)
# optional args:
parser.add_argument(
"-s",
"--state-size",
help="the number of preceeding words used to calculate the probability of the next word. defaults to 2, 1 makes it more random, 3 less so. > 4 will likely have little effect.",
type=int,
default=2,
)
parser.add_argument(
"-n",
"--sentences",
help="the number of 'sentences' to output. defaults to 5. NB: if your text has no initial caps, a 'sentence' will be a paragraph.",
type=int,
default=5,
)
parser.add_argument(
"-l",
"--length",
help="set maximum number of characters per sentence.",
type=int,
)
parser.add_argument(
"-o",
"--overlap",
help="the amount of overlap allowed between original and output, expressed as a ratio between 0 and 1. defaults to 0.5",
type=float,
default=0.5,
)
parser.add_argument(
"-c",
"--combine",
help="provide an another text file to be combined with the first item.",
)
parser.add_argument(
"-C", "--combine-URL", help="provide a URL to be combined with the first item"
)
parser.add_argument(
"-w",
"--weight",
help="specify the weight to be given to the text provided with -c or -C. defaults to 1, and the weight of the initial text is 1. 1.5 will place more weight on the second text, 0.5 will place less.",
type=float,
default=1,
)
# switches
parser.add_argument(
"-u", "--URL", help="infile is a URL instead.", action="store_true"
)
parser.add_argument(
"-d", "--directory", help="infile is a directory instead.", action="store_true"
)
parser.add_argument(
"-f",
"--well-formed",
help="enforce 'well_formed': discard sentences containing []{}()"
"'' from the markov model. use if output is filthy.",
action="store_true",
)
# store_true = default to False.
parser.add_argument(
"--newline",
help="sentences in input file end with newlines \
rather than full stops.",
action="store_true",
)
# store_true = default to False, become True if flagged.
return parser.parse_args()
# make args avail:
args = parse_the_args()
def main():
# if a -c/-C, combine it w infile/URL:
if args.combine or args.combine_URL:
if args.combine:
# get raw text as a string for both:
# infile is URL:
if args.URL:
html = URL(args.infile)
text = convert_html(html)
# infile is dir:
elif args.directory:
matchlist = dir_list(args.infile)
# place batchfile.txt in user-given directory:
batchfile = os.path.dirname(args.infile) + os.path.sep + "batchfile.txt"
dir_cat(matchlist, batchfile)
text = read(batchfile)
os.unlink(batchfile)
# or normal:
else:
text = read(args.infile)
# read -c file:
ctext = read(args.combine)
# if -C, combine it w infile/URL:
elif args.combine_URL:
# infile is URL:
if args.URL:
html = URL(args.infile)
text = convert_html(html)
# infile is dir:
elif args.directory:
matchlist = dir_list(args.infile)
# place batchfile.txt in user-given directory:
batchfile = os.path.dirname(args.infile) + os.path.sep + "batchfile.txt"
dir_cat(matchlist, batchfile)
text = read(batchfile)
# or normal:
else:
text = read(args.infile)
# now combine_URL:
html = URL(args.combine_URL)
ctext = convert_html(html)
# build the models + a combined model:
# with --newline:
if args.newline:
text_model = mkbnewline(text, args.state_size, args.well_formed)
ctext_model = mkbnewline(ctext, args.state_size, args.well_formed)
# no --newline:
else:
text_model = mkbtext(text, args.state_size, args.well_formed)
ctext_model = mkbtext(ctext, args.state_size, args.well_formed)
combo_model = markovify.combine([text_model, ctext_model], [1, args.weight])
# write it combo!
if args.length:
writeshortsentence(
combo_model, args.sentences, args.outfile, args.overlap, args.length
)
else:
writesentence(
combo_model, args.sentences, args.outfile, args.overlap, args.length
)
# if no -c/-C, do normal:
else:
# Get raw text as string.
# either URL:
if args.URL:
html = URL(args.infile)
text = convert_html(html)
elif args.directory:
matchlist = dir_list(args.infile)
# place batchfile.txt in user-given directory:
batchfile = os.path.dirname(args.infile) + os.path.sep + "batchfile.txt"
dir_cat(matchlist, batchfile)
text = read(batchfile)
# or local:
else:
text = read(args.infile)
# Build the model:
# if --newline:
if args.newline:
text_model = mkbnewline(text, args.state_size, args.well_formed)
# no --newline:
else:
text_model = mkbtext(text, args.state_size, args.well_formed)
# write it!
if args.length:
writeshortsentence(
text_model, args.sentences, args.outfile, args.overlap, args.length
)
else:
writesentence(
text_model, args.sentences, args.outfile, args.overlap, args.length
)
print("\n: :\n")
for key, value in vars(args).items():
print(": " + key.ljust(15, " ") + ": " + str(value).ljust(10))
if os.path.isfile(args.outfile):
print(
"\n: literary genius has been written to the file "
+ args.outfile
+ ". thanks for playing!\n\n: 'Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial. Yes, although your very smile suggests that this Armenian enclave is not at all the becomings that are connected...'"
)
else:
print(
": mkv-this ran but did NOT create an output file as requested. this is a very regrettable and dangerous situation. contact the package maintainer asap. soz!"
)
sys.exit()
if __name__ == "__main__":
main()