big cleanup, added fns, added print table of options used

This commit is contained in:
mousebot 2020-04-23 15:53:52 -03:00
parent 003365c3ec
commit a462e45803
1 changed files with 106 additions and 82 deletions

View File

@ -1,5 +1,11 @@
#! /usr/bin/env python3
import os
import requests
import markovify
import sys
import argparse
"""
mkv-this: input text, output markovified text.
Copyright (C) 2020 mousebot@riseup.net.
@ -17,71 +23,100 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
"""
a (very basic) script to markovify local and/or remote text files and
output a user-specified number of sentences to a local text file. see --help for other options.
a (very basic) script to markovify local and/or remote text files and
output a user-specified number of sentences to a local text file.
see --help for other options.
"""
import requests
import markovify
import sys
import argparse
def URL(insert):
try:
req = requests.get(insert)
req.raise_for_status()
except Exception as exc:
print(f'There was a problem: {exc}')
sys.exit()
else:
print('text fetched from URL.')
return req.text
def main():
# argparse for cmd line args
parser = argparse.ArgumentParser(prog="mkv-this", description="markovify a local or remote text file and output the results to local text file.", epilog="may you find many prophetic énoncés in your virtual bird guts! Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial.")
parser = argparse.ArgumentParser(prog="mkv-this", description="markovify a local or remote text file and output the results to local text file.",
epilog="may you find many prophetic énoncés in your virtual bird guts! Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial.")
# positional args:
parser.add_argument('infile', help="the text file to process, with path. NB: file cannot be empty.")
parser.add_argument('outfile', nargs='?', default="./mkv-output.txt", help="the file to save to, with path. if the file is used more than once, subsequent literature will be appended to the file after a star. defaults to ./mkv-output.txt.")
parser.add_argument(
'infile', help="the text file to process, with path. NB: file cannot be empty.")
parser.add_argument('outfile', nargs='?', default="./mkv-output.txt",
help="the file to save to, with path. if the file is used more than once, subsequent literature will be appended to the file after a star. defaults to ./mkv-output.txt.")
# optional args:
parser.add_argument('-s', '--state-size', help="the number of preceeding words used to calculate the probability of the next word. defaults to 2, 1 makes it more random, 3 less so. must be an integer. anything more than 4 will likely have little effect.", type=int, default=2)
parser.add_argument('-u', '--URL', help="infile is a URL. NB: for this to work best it should be the location of a text file.", action='store_true')
parser.add_argument('-n', '--sentences', help="the number of 'sentences' to output. defaults to 5. must be an integer.", type=int, default=5)
parser.add_argument('-l', '--length', help="set maximum number of characters per sentence. must be an integer.", type=int)
parser.add_argument('-o', '--overlap', help="the amount of overlap allowed between original text and the output, expressed as a ratio between 0 and 1. defaults to 0.5", type=float, default=0.5)
parser.add_argument('-c', '--combine', help="provide an another input text file with path to be combined with the first item.")
parser.add_argument('-C', '--combine-URL', help="provide an additional URL to be combined with the first item")
parser.add_argument(
'-u', '--URL', help="infile is a URL. NB: for this to work best it should be the location of a text file.", action='store_true')
parser.add_argument(
'-n', '--sentences', help="the number of 'sentences' to output. defaults to 5. must be an integer.", type=int, default=5)
parser.add_argument(
'-l', '--length', help="set maximum number of characters per sentence. must be an integer.", type=int)
parser.add_argument(
'-o', '--overlap', help="the amount of overlap allowed between original text and the output, expressed as a ratio between 0 and 1. defaults to 0.5", type=float, default=0.5)
parser.add_argument(
'-c', '--combine', help="provide an another input text file with path to be combined with the first item.")
parser.add_argument('-C', '--combine-URL',
help="provide an additional URL to be combined with the first item")
parser.add_argument('-w', '--weight', help="specify the weight to be given to the second text provided with --combine. defaults to 1, and the weight of the initial text is also 1. setting this to 1.5 will place 50 percent more weight on the second text, while setting it to 0.5 will place less.", type=float, default=1)
#switches
parser.add_argument('-f', '--no-well-formed', help="don't enforce 'well_formed', ie allow the inclusion of sentences with []{}()""'' in them in the markov model. this might filth up your text, especially if it contains 'smart' quotes.", action='store_false') # store_false = default to True.
parser.add_argument('--newline', help="sentences in input file end with newlines rather than with full stops.", action='store_true')
# switches
parser.add_argument('-f', '--no-well-formed', help="don't enforce 'well_formed', ie allow the inclusion of sentences with []{}()""'' in them in the markov model. this might filth up your text, especially if it contains 'smart' quotes.", action='store_false')
# store_false = default to True.
parser.add_argument(
'--newline', help="sentences in input file end with newlines \
rather than with full stops.", action='store_true')
# store_true = default to False, become True if flagged.
args = parser.parse_args()
fnf = 'error: file not found. please provide a path to a really-existing file!'
# some read/build/write fns:
def URL(insert):
try:
req = requests.get(insert)
req.raise_for_status()
except Exception as exc:
print(f'There was a problem: {exc}')
sys.exit()
else:
print('text fetched from URL.')
return req.text
def read(infile):
with open(infile, encoding="latin-1") as f:
return f.read()
def mkbtext(texttype):
return markovify.Text(texttype, state_size=args.state_size,
well_formed=args.no_well_formed)
def mkbnewline(texttype):
return markovify.Text(texttype, state_size=args.state_size,
well_formed=args.no_well_formed)
def writesent(tmodel):
return output.write(str(tmodel.make_sentence(
tries=2000, max_overlap_ratio=args.overlap,
max_chars=args.length)) + '\n \n')
def writeshortsent(tmodel):
return output.write(str(tmodel.make_short_sentence(
tries=2000, max_overlap_ratio=args.overlap,
max_chars=args.length)) + '\n \n')
fnf = 'error: file not found. please provide a path to a really-existing \
file!'
# if a combine file is provided, we will combine it w infile/URL:
if args.combine or args.combine_URL:
if args.combine:
# get raw text as a string for both files:
# get raw text as a string for both files:
try:
# infile can be a URL:
if args.URL:
text = URL(args.infile)
# or normal file:
else:
with open(args.infile, encoding="latin-1") as f:
text = f.read()
text = read(args.infile)
# read combine file:
with open(args.combine, encoding="latin-1") as cf:
ctext = cf.read()
ctext = read(args.combine)
except FileNotFoundError:
print(fnf)
sys.exit()
@ -89,13 +124,12 @@ def main():
# if combine_URL is provided, we will combine it w infile/URL:
elif args.combine_URL:
try:
# infile can still be a URL:
# infile can still be a URL:
if args.URL:
text = URL(args.infile)
# or normal file:
else:
with open(args.infile, encoding="latin-1") as f:
text = f.read()
text = read(args.infile)
except FileNotFoundError:
print(fnf)
sys.exit()
@ -104,85 +138,75 @@ def main():
# build the models and build a combined model:
# with newline flagged:
if args.newline :
text_model = markovify.NewlineText(
text, state_size=args.state_size, well_formed=args.no_well_formed)
ctext_model = markovify.NewlineText(
ctext, state_size=args.state_size, well_formed=args.no_well_formed)
# no newline flag:
if args.newline:
text_model = mkbnewline(text)
ctext_model = mkbnewline(ctext)
else:
text_model = markovify.Text(text,
state_size=args.state_size, well_formed=args.no_well_formed)
ctext_model = markovify.Text(ctext,
state_size=args.state_size, well_formed=args.no_well_formed)
text_model = mkbtext(text)
ctext_model = mkbtext(ctext)
combo_model = markovify.combine([text_model, ctext_model], [1, args.weight])
combo_model = markovify.combine(
[text_model, ctext_model], [1, args.weight])
# Print -n number of randomly-generated sentences
for i in range(args.sentences):
output = open(args.outfile, 'a') # appending
# short sentence:
if args.length :
output.write(str(combo_model.make_short_sentence(
args.length, tries=2000, max_overlap_ratio=args.overlap)) + '\n \n')
if args.length:
writeshortsent(combo_model)
# normal sentence:
else:
output.write(str(combo_model.make_sentence(
tries=2000, max_overlap_ratio=args.overlap)) + '\n \n')
# add newline between each sentence.
writesent(combo_model)
output.write(str(' \n \n * \n \n'))
# add a star between each appended set.
output.close()
# if no combo file, just do normal:
# if no combine, just do normal:
else:
# Get raw text as string.
# either from a URL:
if args.URL:
text = URL(args.infile)
# or a normal local file:
text = URL(args.infile)
# or local file:
else:
try:
with open(args.infile, encoding="latin-1") as f:
text = f.read()
text = read(args.infile)
except FileNotFoundError:
print(fnf)
sys.exit()
# Build the model:
# NB: this errors if infile is EMPTY:
## newline flagged:
if args.newline :
text_model = markovify.NewlineText(text,
state_size=args.state_size, well_formed=args.no_well_formed)
# no newline flag:
# Build the model:
# if --newline:
if args.newline:
text_model = mkbnewline(text)
# no --newline:
else:
text_model = markovify.Text(text,
state_size=args.state_size, well_formed=args.no_well_formed)
text_model = mkbtext(text)
# Print -n number of randomly-generated sentences
for i in range(args.sentences):
output = open(args.outfile, 'a') # append to file
# short sentence:
if args.length :
output.write(str(text_model.make_short_sentence(
args.length, tries=2000, max_overlap_ratio=args.overlap)) + '\n \n')
if args.length:
writeshortsent(text_model)
# normal sentence:
else:
output.write(str(text_model.make_sentence(
tries=2000, max_overlap_ratio=args.overlap)) + '\n \n')
# \n to add newline between each sentence.
writesent(text_model)
output.write(str(' \n \n * \n \n'))
# add a star between each appended set.
output.close()
print('\n: The options you used are as follows:\n')
for key, value in vars(args).items():
print(key, ': ', value)
print('\n: literary genius has been written to the file ' + args.outfile + '. thanks for playing! \n\n: Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial. Yes, although your very smile suggests that this Armenian enclave is not at all the becomings that are connected...')
print(': ' + key.ljust(15, ' ') + ': ' + str(value).ljust(10))
if os.path.isfile(args.outfile):
print('\n: literary genius has been written to the file '
+ args.outfile + '. thanks for playing!\n\n: Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial. Yes, although your very smile suggests that this Armenian enclave is not at all the becomings that are connected...')
else:
print('mkv-this ran but did NOT create an output file as requested. this is a very regrettable and dangerous situation. contact the package maintainer asap. soz!')
sys.exit()
# enable this for testing the file:
# enable for testing:
# main()