Merge branch 'testing'

This commit is contained in:
mousebot 2020-04-24 19:12:48 -03:00
commit 963a2d8402
3 changed files with 90 additions and 39 deletions

View File

@ -28,6 +28,7 @@ import requests
import markovify
import sys
import argparse
import html2text
# argparse
def parse_the_args():
@ -36,9 +37,9 @@ def parse_the_args():
# positional args:
parser.add_argument(
'infile', help="the text file to process, with path. NB: file cannot be empty.")
'infile', help="the text file to process. NB: file cannot be empty.")
parser.add_argument('outfile', nargs='?', default="./mkv-output.txt",
help="the file to save to, with path. if the file is used more than once, subsequent literature will be appended to the file after a star. defaults to ./mkv-output.txt.")
help="the file to save to. if the file is used more than once, subsequent literature will be appended to it. defaults to ./mkv-output.txt.")
# optional args:
parser.add_argument('-s', '--state-size', help="the number of preceeding words used to calculate the probability of the next word. defaults to 2, 1 makes it more random, 3 less so. > 4 will likely have little effect.", type=int, default=2)
@ -51,12 +52,12 @@ def parse_the_args():
parser.add_argument(
'-c', '--combine', help="provide an another text file to be combined with the first item.")
parser.add_argument('-C', '--combine-URL',
help="provide an additional URL to be combined with the first item")
help="provide a URL to be combined with the first item")
parser.add_argument('-w', '--weight', help="specify the weight to be given to the text provided with -c or -C. defaults to 1, and the weight of the initial text is 1. 1.5 will place more weight on the second text, 0.5 will place less.", type=float, default=1)
# switches
parser.add_argument(
'-u', '--URL', help="infile is a URL. NB: for this to work it should be the location of a text file.", action='store_true')
'-u', '--URL', help="infile is a URL instead.", action='store_true')
parser.add_argument('-f', '--no-well-formed', help="don't enforce 'well_formed': allow the inclusion of sentences containing []{}()""'' in the markov model. might filth up your text, eg if it contains 'smart' quotes.", action='store_false')
# store_false = default to True.
parser.add_argument(
@ -66,7 +67,7 @@ def parse_the_args():
return parser.parse_args()
# read/build/write fns:
# fetch/read/build/write fns:
def URL(insert):
@ -74,13 +75,21 @@ def URL(insert):
req = requests.get(insert)
req.raise_for_status()
except Exception as exc:
print(f'There was a problem: {exc}')
print(f': There was a problem: {exc}.\n: Please enter a valid URL')
sys.exit()
else:
print('text fetched from URL.')
print(': fetched URL.')
return req.text
def convert_html(html):
h2t = html2text.HTML2Text()
h2t.ignore_links = True
h2t.ignore_images = True
print(': URL converted to text')
return h2t.handle(html)
def read(infile):
try:
with open(infile, encoding="utf-8") as f:
@ -122,7 +131,7 @@ def writesentence(tmodel):
# make args + fnf avail to all:
args = parse_the_args()
fnf = 'error: file not found. please provide a path to a really-existing \
fnf = ': error: file not found. please provide a path to a really-existing \
file!'
@ -134,30 +143,27 @@ def main():
# try:
# infile is URL:
if args.URL:
text = URL(args.infile)
html = URL(args.infile)
text = convert_html(html)
# or normal:
else:
text = read(args.infile)
# read -c file:
ctext = read(args.combine)
# except FileNotFoundError:
# print(fnf)
# sys.exit()
# if -C, combine it w infile/URL:
elif args.combine_URL:
# try:
# infile is URL:
if args.URL:
text = URL(args.infile)
html = URL(args.infile)
text = convert_html(html)
# or normal:
else:
text = read(args.infile)
# except FileNotFoundError:
# print(fnf)
# sys.exit()
# now combine_URL:
ctext = URL(args.combine_URL)
html = URL(args.combine_URL)
ctext = convert_html(html)
# build the models + a combined model:
# with --newline:
@ -179,14 +185,11 @@ def main():
# Get raw text as string.
# either URL:
if args.URL:
text = URL(args.infile)
html = URL(args.infile)
text = convert_html(html)
# or local:
else:
# try:
text = read(args.infile)
# except FileNotFoundError:
# print(fnf)
# sys.exit()
# Build the model:
# if --newline:
@ -202,10 +205,10 @@ def main():
for key, value in vars(args).items():
print(': ' + key.ljust(15, ' ') + ': ' + str(value).ljust(10))
if os.path.isfile(args.outfile):
print('\n: literary genius has been written to the file '
+ args.outfile + '. thanks for playing!\n\n: Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial. Yes, although your very smile suggests that this Armenian enclave is not at all the becomings that are connected...')
print("\n: literary genius has been written to the file "
+ args.outfile + ". thanks for playing!\n\n: 'Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial. Yes, although your very smile suggests that this Armenian enclave is not at all the becomings that are connected...'")
else:
print('mkv-this ran but did NOT create an output file as requested. this is a very regrettable and dangerous situation. contact the package maintainer asap. soz!')
print(': mkv-this ran but did NOT create an output file as requested. this is a very regrettable and dangerous situation. contact the package maintainer asap. soz!')
sys.exit()

View File

@ -25,7 +25,8 @@ import os
import markovify
import sys
import argparse
import html2text
import requests
# argparse
def parse_the_args():
@ -41,7 +42,7 @@ def parse_the_args():
parser.add_argument('-n', '--sentences', help="the number of 'sentences' to output. defaults to 5.", type=int, default=5)
parser.add_argument('-l', '--length', help="set maximum number of characters per sentence.", type=int)
parser.add_argument('-o', '--overlap', help="the amount of overlap allowed between original text and the output, expressed as a radio between 0 and 1. lower values make it more random. defaults to 0.5", type=float, default=0.5)
parser.add_argument('-c', '--combine', help="provide an another input text file with path to be combined with the input directory.")
parser.add_argument('-C', '--combine-URL', help="provide a URL to be combined with the input dir")
parser.add_argument('-w', '--weight', help="specify the weight to be given to the second text provided with --combine. defaults to 1, and the weight of the initial text is also 1. setting this to 1.5 will place 50 percent more weight on the second text. setting it to 0.5 will place less.", type=float, default=1)
# switches
@ -52,7 +53,29 @@ def parse_the_args():
return parser.parse_args()
# read, build, write fns:
# retch, read, build, write fns:
def URL(insert):
try:
req = requests.get(insert)
req.raise_for_status()
except Exception as exc:
print(f': There was a problem: {exc}.\n: Please enter a valid URL')
sys.exit()
else:
print(': fetched URL.')
return req.text
def convert_html(html):
h2t = html2text.HTML2Text()
h2t.ignore_links = True
h2t.ignore_images = True
print(': URL converted to text')
return h2t.handle(html)
def read(infile):
try:
with open(infile, encoding="utf-8") as f:
@ -100,8 +123,10 @@ def main():
for filename in filenames:
if filename.endswith(('.txt', '.org', '.md')):
matches.append(os.path.join(root, filename))
print(': text files fetched and combined')
else:
print('error: please enter a valid directory')
print(': error: please enter a valid directory')
sys.exit()
# place batchfile.txt in user-given directory:
batchfile = os.path.dirname(args.indir) + os.path.sep + 'batchfile.txt'
@ -119,16 +144,38 @@ def main():
# Get raw text from batchfile as string.
text = read(batchfile)
# Build model:
# if --newline:
if args.newline:
text_model = mkbnewline(text)
# no --newline:
else:
text_model = mkbtext(text)
writesentence(text_model)
if args.combine_URL:
html = URL(args.combine_URL)
ctext = convert_html(html)
# Build combo model:
# if --newline:
if args.newline:
text_model = mkbnewline(text)
ctext_model = mkbnewline(ctext)
# no --newline:
else:
text_model = mkbtext(text)
ctext_model = mkbtext(ctext)
combo_model = markovify.combine(
[text_model, ctext_model], [1, args.weight])
writesentence(combo_model)
# no combining:
else:
# Build model:
# if --newline:
if args.newline:
text_model = mkbnewline(text)
# no --newline:
else:
text_model = mkbtext(text)
writesentence(text_model)
os.unlink(batchfile)
print('\n: The options you used are as follows:\n')
@ -138,7 +185,7 @@ def main():
print('\n: literary genius has been written to the file '
+ args.outfile + '. thanks for playing!\n\n: Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial. Yes, although your very smile suggests that this Armenian enclave is not at all the becomings that are connected...')
else:
print('mkv-this ran but did NOT create an output file as requested. this is a very regrettable and dangerous situation. contact the package maintainer asap. soz!')
print(': mkv-this ran but did NOT create an output file as requested. this is a very regrettable and dangerous situation. contact the package maintainer asap. soz!')
sys.exit()

View File

@ -25,6 +25,7 @@ setup(name='mkv-this',
install_requires=[
'markovify',
'argparse',
'html2text',
],
zip_safe=False,
)