Merge branch 'testing'
This commit is contained in:
commit
41816a00b5
|
@ -25,6 +25,7 @@ import sys
|
||||||
import argparse
|
import argparse
|
||||||
from .functions import URL, convert_html, read, mkbtext, mkbnewline, writesentence, writeshortsentence
|
from .functions import URL, convert_html, read, mkbtext, mkbnewline, writesentence, writeshortsentence
|
||||||
|
|
||||||
|
|
||||||
# argparse
|
# argparse
|
||||||
def parse_the_args():
|
def parse_the_args():
|
||||||
parser = argparse.ArgumentParser(prog="mkv-this", description="markovify local text files or URLs and output the results to a local text file.",
|
parser = argparse.ArgumentParser(prog="mkv-this", description="markovify local text files or URLs and output the results to a local text file.",
|
||||||
|
@ -53,7 +54,9 @@ def parse_the_args():
|
||||||
# switches
|
# switches
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-u', '--URL', help="infile is a URL instead.", action='store_true')
|
'-u', '--URL', help="infile is a URL instead.", action='store_true')
|
||||||
parser.add_argument('-f', '--well-formed', help="enforce 'well_formed': discard sentences containing []{}()""'' from the markov model. use if output is filthy.", action='store_true') # store_false = default to True.
|
# store_false = default to True.
|
||||||
|
parser.add_argument('-f', '--well-formed',
|
||||||
|
help="enforce 'well_formed': discard sentences containing []{}()""'' from the markov model. use if output is filthy.", action='store_true')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--newline', help="sentences in input file end with newlines \
|
'--newline', help="sentences in input file end with newlines \
|
||||||
rather than full stops.", action='store_true')
|
rather than full stops.", action='store_true')
|
||||||
|
@ -109,9 +112,11 @@ def main():
|
||||||
|
|
||||||
# write it combo!
|
# write it combo!
|
||||||
if args.length:
|
if args.length:
|
||||||
writeshortsentence(combo_model, args.sentences, args.outfile, args.overlap, args.length)
|
writeshortsentence(combo_model, args.sentences,
|
||||||
|
args.outfile, args.overlap, args.length)
|
||||||
else:
|
else:
|
||||||
writesentence(combo_model, args.sentences, args.outfile, args.overlap, args.length)
|
writesentence(combo_model, args.sentences,
|
||||||
|
args.outfile, args.overlap, args.length)
|
||||||
|
|
||||||
# if no -c/-C, do normal:
|
# if no -c/-C, do normal:
|
||||||
else:
|
else:
|
||||||
|
@ -134,9 +139,11 @@ def main():
|
||||||
|
|
||||||
# write it!
|
# write it!
|
||||||
if args.length:
|
if args.length:
|
||||||
writeshortsentence(text_model, args.sentences, args.outfile, args.overlap, args.length)
|
writeshortsentence(text_model, args.sentences,
|
||||||
|
args.outfile, args.overlap, args.length)
|
||||||
else:
|
else:
|
||||||
writesentence(text_model, args.sentences, args.outfile, args.overlap, args.length)
|
writesentence(text_model, args.sentences,
|
||||||
|
args.outfile, args.overlap, args.length)
|
||||||
|
|
||||||
print('\n: :\n')
|
print('\n: :\n')
|
||||||
for key, value in vars(args).items():
|
for key, value in vars(args).items():
|
||||||
|
|
|
@ -24,27 +24,36 @@ import sys
|
||||||
import argparse
|
import argparse
|
||||||
from .functions import URL, convert_html, read, mkbtext, mkbnewline, writesentence, writeshortsentence
|
from .functions import URL, convert_html, read, mkbtext, mkbnewline, writesentence, writeshortsentence
|
||||||
|
|
||||||
|
|
||||||
# argparse
|
# argparse
|
||||||
def parse_the_args():
|
def parse_the_args():
|
||||||
parser = argparse.ArgumentParser(prog="mkv-this-dir", description="markovify all text files in a director and output the results to a text file.",
|
parser = argparse.ArgumentParser(prog="mkv-this-dir", description="markovify all text files in a director and output the results to a text file.",
|
||||||
epilog="may you find many prophetic énoncés in your virtual bird guts! Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial.")
|
epilog="may you find many prophetic énoncés in your virtual bird guts! Here, this is not at all the becomings that are connected... so if you want to edit it like a bot yourself, it is trivial.")
|
||||||
|
|
||||||
# positional args:
|
# positional args:
|
||||||
parser.add_argument('indir', help="the directory to extract the text of all text files from, with path.")
|
parser.add_argument(
|
||||||
parser.add_argument('outfile', nargs='?', default="./mkv-dir-output.txt", help="the file to save to, with path. if the file is used more than once, subsequent literature will be appended to the file after a star. defaults to ./mkv-dir-output.txt.")
|
'indir', help="the directory to extract the text of all text files from, with path.")
|
||||||
|
parser.add_argument('outfile', nargs='?', default="./mkv-dir-output.txt",
|
||||||
|
help="the file to save to, with path. if the file is used more than once, subsequent literature will be appended to the file after a star. defaults to ./mkv-dir-output.txt.")
|
||||||
|
|
||||||
# optional args:
|
# optional args:
|
||||||
parser.add_argument('-s', '--state-size', help="the number of preceeding words the probability of the next word depends on. defaults to 2, 1 makes it more random, 3 less so.", type=int, default=2)
|
parser.add_argument(
|
||||||
parser.add_argument('-n', '--sentences', help="the number of 'sentences' to output. defaults to 5. NB: if your text has no initial caps, a 'sentence' will be a paragraph.", type=int, default=5)
|
'-s', '--state-size', help="the number of preceeding words the probability of the next word depends on. defaults to 2, 1 makes it more random, 3 less so.", type=int, default=2)
|
||||||
parser.add_argument('-l', '--length', help="set maximum number of characters per sentence.", type=int)
|
parser.add_argument(
|
||||||
|
'-n', '--sentences', help="the number of 'sentences' to output. defaults to 5. NB: if your text has no initial caps, a 'sentence' will be a paragraph.", type=int, default=5)
|
||||||
|
parser.add_argument(
|
||||||
|
'-l', '--length', help="set maximum number of characters per sentence.", type=int)
|
||||||
parser.add_argument('-o', '--overlap', help="the amount of overlap allowed between original text and the output, expressed as a radio between 0 and 1. lower values make it more random. defaults to 0.5", type=float, default=0.5)
|
parser.add_argument('-o', '--overlap', help="the amount of overlap allowed between original text and the output, expressed as a radio between 0 and 1. lower values make it more random. defaults to 0.5", type=float, default=0.5)
|
||||||
parser.add_argument('-C', '--combine-URL', help="provide a URL to be combined with the input dir")
|
parser.add_argument('-C', '--combine-URL',
|
||||||
|
help="provide a URL to be combined with the input dir")
|
||||||
parser.add_argument('-w', '--weight', help="specify the weight to be given to the second text provided with --combine. defaults to 1, and the weight of the initial text is also 1. setting this to 1.5 will place 50 percent more weight on the second text. setting it to 0.5 will place less.", type=float, default=1)
|
parser.add_argument('-w', '--weight', help="specify the weight to be given to the second text provided with --combine. defaults to 1, and the weight of the initial text is also 1. setting this to 1.5 will place 50 percent more weight on the second text. setting it to 0.5 will place less.", type=float, default=1)
|
||||||
|
|
||||||
# switches
|
# switches
|
||||||
parser.add_argument('-f', '--well-formed', help="enforce 'well_formed', doscard sentences with []{}()""'' from the markov model. use if output is filthy.", action='store_true')
|
parser.add_argument('-f', '--well-formed',
|
||||||
|
help="enforce 'well_formed', doscard sentences with []{}()""'' from the markov model. use if output is filthy.", action='store_true')
|
||||||
# store_false = default to True.
|
# store_false = default to True.
|
||||||
parser.add_argument('--newline', help="sentences in input file end with newlines rather than with full stops.", action='store_true')
|
parser.add_argument(
|
||||||
|
'--newline', help="sentences in input file end with newlines rather than with full stops.", action='store_true')
|
||||||
# store_true = default to False, become True if flagged.
|
# store_true = default to False, become True if flagged.
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
@ -55,7 +64,7 @@ args = parse_the_args()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
#create a list of files to concatenate:
|
# create a list of files to concatenate:
|
||||||
matches = []
|
matches = []
|
||||||
if os.path.isdir(args.indir) is True:
|
if os.path.isdir(args.indir) is True:
|
||||||
for root, dirnames, filenames in os.walk(args.indir):
|
for root, dirnames, filenames in os.walk(args.indir):
|
||||||
|
@ -71,7 +80,7 @@ def main():
|
||||||
batchfile = os.path.dirname(args.indir) + os.path.sep + 'batchfile.txt'
|
batchfile = os.path.dirname(args.indir) + os.path.sep + 'batchfile.txt'
|
||||||
|
|
||||||
# concatenate files into batchfile.txt:
|
# concatenate files into batchfile.txt:
|
||||||
with open(batchfile, 'w') as outfile:
|
with open(batchfile, 'w') as outfile:
|
||||||
for fname in matches:
|
for fname in matches:
|
||||||
try:
|
try:
|
||||||
with open(fname, encoding="utf-8") as infile:
|
with open(fname, encoding="utf-8") as infile:
|
||||||
|
@ -87,7 +96,7 @@ def main():
|
||||||
if args.combine_URL:
|
if args.combine_URL:
|
||||||
html = URL(args.combine_URL)
|
html = URL(args.combine_URL)
|
||||||
ctext = convert_html(html)
|
ctext = convert_html(html)
|
||||||
|
|
||||||
# Build combo model:
|
# Build combo model:
|
||||||
# if --newline:
|
# if --newline:
|
||||||
if args.newline:
|
if args.newline:
|
||||||
|
@ -103,9 +112,11 @@ def main():
|
||||||
|
|
||||||
# write it combo!
|
# write it combo!
|
||||||
if args.length:
|
if args.length:
|
||||||
writeshortsentence(combo_model, args.sentences, args.outfile, args.overlap, args.length)
|
writeshortsentence(combo_model, args.sentences,
|
||||||
|
args.outfile, args.overlap, args.length)
|
||||||
else:
|
else:
|
||||||
writesentence(combo_model, args.sentences, args.outfile, args.overlap, args.length)
|
writesentence(combo_model, args.sentences,
|
||||||
|
args.outfile, args.overlap, args.length)
|
||||||
|
|
||||||
# no combining:
|
# no combining:
|
||||||
else:
|
else:
|
||||||
|
@ -119,10 +130,12 @@ def main():
|
||||||
|
|
||||||
# write it!
|
# write it!
|
||||||
if args.length:
|
if args.length:
|
||||||
writeshortsentence(text_model, args.sentences, args.outfile, args.overlap, args.length)
|
writeshortsentence(text_model, args.sentences,
|
||||||
|
args.outfile, args.overlap, args.length)
|
||||||
else:
|
else:
|
||||||
writesentence(text_model, args.sentences, args.outfile, args.overlap, args.length)
|
writesentence(text_model, args.sentences,
|
||||||
|
args.outfile, args.overlap, args.length)
|
||||||
|
|
||||||
os.unlink(batchfile)
|
os.unlink(batchfile)
|
||||||
|
|
||||||
# print('\n: The options you used are as follows:\n')
|
# print('\n: The options you used are as follows:\n')
|
||||||
|
@ -136,6 +149,7 @@ def main():
|
||||||
print(': mkv-this ran but did NOT create an output file as requested. this is a very regrettable and dangerous situation. contact the package maintainer asap. soz!')
|
print(': mkv-this ran but did NOT create an output file as requested. this is a very regrettable and dangerous situation. contact the package maintainer asap. soz!')
|
||||||
|
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in New Issue