added no_well_formed option

This commit is contained in:
mouse 2020-04-21 10:45:52 -03:00
parent ea0655bbb0
commit f5619b9ef8
2 changed files with 22 additions and 21 deletions

View File

@ -21,16 +21,6 @@
"""
a (very basic) script to markovify a text file and
output a user-specified number of sentences to a text file. see --help for other options.
TODO: implement newline texts rather than full stop texts. (good for poetry no?)
it is a separate class/function in markovify, like so:
class NewlineText(Text):
# A (usable) example of subclassing markovify.Text. This one lets you markovify text where the sentences are separated by newlines instead of ". "
def sentence_split(self, text):
return re.split(r"\s*\n\s*", text)
"""
@ -47,12 +37,14 @@ def main():
parser.add_argument('outfile', nargs='?', default="./mkv-output.txt", help="the file to save to, with path. if the file is used more than once, subsequent literature will be appended to the file after a star. defaults to ./mkv-output.txt.")
# optional args:
parser.add_argument('-s', '--statesize', help="the number of preceeding words used to calculate the probability of the next word. defaults to 2, 1 makes it more random, 3 less so. must be an integer. anything more than 4 will likely have little effect.", type=int, default=2)
parser.add_argument('-s', '--state-size', help="the number of preceeding words used to calculate the probability of the next word. defaults to 2, 1 makes it more random, 3 less so. must be an integer. anything more than 4 will likely have little effect.", type=int, default=2)
# if i use --state-size (w a dash), type=int doesn't work.
parser.add_argument('-n', '--sentences', help="the number of 'sentences' to output. defaults to 5. must be an integer.", type=int, default=5)
parser.add_argument('-l', '--length', help="set maximum number of characters per sentence. must be an integer.", type=int)
parser.add_argument('-o', '--overlap', help="the amount of overlap allowed between original text and the output, expressed as a ratio between 0 and 1. defaults to 0.5", type=float, default=0.5)
parser.add_argument('--newline', help="sentences in input file end with newlines rather than with full stops.", action='store_true') # store_true means default to False, and becomes True if flagged.
parser.add_argument('-f', '--no-well-formed', help="don't enforce 'well_formed', ie allow the inclusion of sentences with []{}()""'' in them in the markov model. this might filth up your text, especially if it contains 'smart' quotes.", action='store_false') # store_false = default to True.
parser.add_argument('--newline', help="sentences in input file end with newlines rather than with full stops.", action='store_true')
# store_true = default to False, become True if flagged.
parser.add_argument('-c', '--combine', help="provide an another input text file with path to be combined with the first.")
parser.add_argument('-w', '--weight', help="specify the weight to be given to the second text provided with --combine. defaults to 1, and the weight of the initial text is also 1. setting this to 1.5 will place 50 percent more weight on the second text, while setting it to 0.5 will place less.", type=float, default=1)
@ -70,11 +62,15 @@ def main():
# build the models and build a combined model:
# NB: attempting to implement Newline option here (and below):
if args.newline :
text_model = markovify.NewlineText(text, state_size=args.statesize)
ctext_model = markovify.NewlineText(ctext, state_size=args.statesize)
text_model = markovify.NewlineText(
text, state_size=args.state_size, well_formed=args.no_well_formed)
ctext_model = markovify.NewlineText(
ctext, state_size=args.state_size, well_formed=args.no_well_formed)
else:
text_model = markovify.Text(text, state_size=args.statesize)
ctext_model = markovify.Text(ctext, state_size=args.statesize)
text_model = markovify.Text(text,
state_size=args.state_size, well_formed=args.no_well_formed)
ctext_model = markovify.Text(ctext,
state_size=args.state_size, well_formed=args.no_well_formed)
combo_model = markovify.combine([text_model, ctext_model], [1, args.weight])
@ -103,9 +99,11 @@ def main():
## implement newline option here:
if args.newline :
text_model = markovify.NewlineText(text, state_size=args.statesize)
text_model = markovify.NewlineText(text,
state_size=args.state_size, well_formed=args.no_well_formed)
else:
text_model = markovify.Text(text, state_size=args.statesize)
text_model = markovify.Text(text,
state_size=args.state_size, well_formed=args.no_well_formed)
# Print -n number of randomly-generated sentences
for i in range(args.sentences):

View File

@ -39,10 +39,11 @@ def main():
parser.add_argument('outfile', nargs='?', default="./mkv-dir-output.txt", help="the file to save to, with path. if the file is used more than once, subsequent literature will be appended to the file after a star. defaults to ./mkv-dir-output.txt.")
# optional args:
parser.add_argument('-s', '--statesize', help="the number of preceeding words the probability of the next word depends on. defaults to 2, 1 makes it more random, 3 less so.", type=int, default=2)
parser.add_argument('-s', '--state-size', help="the number of preceeding words the probability of the next word depends on. defaults to 2, 1 makes it more random, 3 less so.", type=int, default=2)
parser.add_argument('-n', '--sentences', help="the number of 'sentences' to output. defaults to 5.", type=int, default=5)
parser.add_argument('-l', '--length', help="set maximum number of characters per sentence.", type=int)
parser.add_argument('-o', '--overlap', help="the amount of overlap allowed between original text and the output, expressed as a radio between 0 and 1. lower values make it more random. defaults to 0.5", type=float, default=0.5)
parser.add_argument('-f', '--no-well-formed', help="don't enforce 'well_formed', ie allow the inclusion of sentences with []{}()""'' in them in the markov model. this might filth up your text, especially if it contains 'smart' quotes.", action='store_false') # store_false = default to True.
parser.add_argument('--newline', help="sentences in input file end with newlines rather than with full stops.", action='store_true')
parser.add_argument('-c', '--combine', help="provide an another input text file with path to be combined with the first.")
parser.add_argument('-w', '--weight', help="specify the weight to be given to the second text provided with --combine. defaults to 1, and the weight of the initial text is also 1. setting this to 1.5 will place 50 percent more weight on the second text. setting it to 0.5 will place less.", type=float, default=1)
@ -73,9 +74,11 @@ def main():
# Build the model:
if args.newline :
text_model = markovify.NewlineText(text, state_size=args.statesize)
text_model = markovify.NewlineText(text,
state_size=args.state_size, well_formed=args.no_well_formed)
else:
text_model = markovify.Text(text, state_size=args.statesize)
text_model = markovify.Text(text,
state_size=args.state_size, well_formed=args.no_well_formed)
# Print -n number of randomly-generated sentences
for i in range(args.sentences):