mirror of https://github.com/pypa/pip
Upgrade pyparsing to 3.0.7
This commit is contained in:
parent
5c565fc786
commit
5b14995b85
|
@ -0,0 +1 @@
|
||||||
|
Upgrade pyparsing to 3.0.7
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,328 @@
|
||||||
|
# module pyparsing.py
|
||||||
|
#
|
||||||
|
# Copyright (c) 2003-2021 Paul T. McGuire
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
# a copy of this software and associated documentation files (the
|
||||||
|
# "Software"), to deal in the Software without restriction, including
|
||||||
|
# without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
# permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
# the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be
|
||||||
|
# included in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
#
|
||||||
|
|
||||||
|
__doc__ = """
|
||||||
|
pyparsing module - Classes and methods to define and execute parsing grammars
|
||||||
|
=============================================================================
|
||||||
|
|
||||||
|
The pyparsing module is an alternative approach to creating and
|
||||||
|
executing simple grammars, vs. the traditional lex/yacc approach, or the
|
||||||
|
use of regular expressions. With pyparsing, you don't need to learn
|
||||||
|
a new syntax for defining grammars or matching expressions - the parsing
|
||||||
|
module provides a library of classes that you use to construct the
|
||||||
|
grammar directly in Python.
|
||||||
|
|
||||||
|
Here is a program to parse "Hello, World!" (or any greeting of the form
|
||||||
|
``"<salutation>, <addressee>!"``), built up using :class:`Word`,
|
||||||
|
:class:`Literal`, and :class:`And` elements
|
||||||
|
(the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
|
||||||
|
and the strings are auto-converted to :class:`Literal` expressions)::
|
||||||
|
|
||||||
|
from pip._vendor.pyparsing import Word, alphas
|
||||||
|
|
||||||
|
# define grammar of a greeting
|
||||||
|
greet = Word(alphas) + "," + Word(alphas) + "!"
|
||||||
|
|
||||||
|
hello = "Hello, World!"
|
||||||
|
print(hello, "->", greet.parse_string(hello))
|
||||||
|
|
||||||
|
The program outputs the following::
|
||||||
|
|
||||||
|
Hello, World! -> ['Hello', ',', 'World', '!']
|
||||||
|
|
||||||
|
The Python representation of the grammar is quite readable, owing to the
|
||||||
|
self-explanatory class names, and the use of :class:`'+'<And>`,
|
||||||
|
:class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators.
|
||||||
|
|
||||||
|
The :class:`ParseResults` object returned from
|
||||||
|
:class:`ParserElement.parseString` can be
|
||||||
|
accessed as a nested list, a dictionary, or an object with named
|
||||||
|
attributes.
|
||||||
|
|
||||||
|
The pyparsing module handles some of the problems that are typically
|
||||||
|
vexing when writing text parsers:
|
||||||
|
|
||||||
|
- extra or missing whitespace (the above program will also handle
|
||||||
|
"Hello,World!", "Hello , World !", etc.)
|
||||||
|
- quoted strings
|
||||||
|
- embedded comments
|
||||||
|
|
||||||
|
|
||||||
|
Getting Started -
|
||||||
|
-----------------
|
||||||
|
Visit the classes :class:`ParserElement` and :class:`ParseResults` to
|
||||||
|
see the base classes that most other pyparsing
|
||||||
|
classes inherit from. Use the docstrings for examples of how to:
|
||||||
|
|
||||||
|
- construct literal match expressions from :class:`Literal` and
|
||||||
|
:class:`CaselessLiteral` classes
|
||||||
|
- construct character word-group expressions using the :class:`Word`
|
||||||
|
class
|
||||||
|
- see how to create repetitive expressions using :class:`ZeroOrMore`
|
||||||
|
and :class:`OneOrMore` classes
|
||||||
|
- use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
|
||||||
|
and :class:`'&'<Each>` operators to combine simple expressions into
|
||||||
|
more complex ones
|
||||||
|
- associate names with your parsed results using
|
||||||
|
:class:`ParserElement.setResultsName`
|
||||||
|
- access the parsed data, which is returned as a :class:`ParseResults`
|
||||||
|
object
|
||||||
|
- find some helpful expression short-cuts like :class:`delimitedList`
|
||||||
|
and :class:`oneOf`
|
||||||
|
- find more useful common expressions in the :class:`pyparsing_common`
|
||||||
|
namespace class
|
||||||
|
"""
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
|
||||||
|
class version_info(NamedTuple):
|
||||||
|
major: int
|
||||||
|
minor: int
|
||||||
|
micro: int
|
||||||
|
releaselevel: str
|
||||||
|
serial: int
|
||||||
|
|
||||||
|
@property
|
||||||
|
def __version__(self):
|
||||||
|
return "{}.{}.{}".format(self.major, self.minor, self.micro) + (
|
||||||
|
"{}{}{}".format(
|
||||||
|
"r" if self.releaselevel[0] == "c" else "",
|
||||||
|
self.releaselevel[0],
|
||||||
|
self.serial,
|
||||||
|
),
|
||||||
|
"",
|
||||||
|
)[self.releaselevel == "final"]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "{} {} / {}".format(__name__, self.__version__, __version_time__)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{}.{}({})".format(
|
||||||
|
__name__,
|
||||||
|
type(self).__name__,
|
||||||
|
", ".join("{}={!r}".format(*nv) for nv in zip(self._fields, self)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__version_info__ = version_info(3, 0, 7, "final", 0)
|
||||||
|
__version_time__ = "15 Jan 2022 04:10 UTC"
|
||||||
|
__version__ = __version_info__.__version__
|
||||||
|
__versionTime__ = __version_time__
|
||||||
|
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
|
||||||
|
|
||||||
|
from .util import *
|
||||||
|
from .exceptions import *
|
||||||
|
from .actions import *
|
||||||
|
from .core import __diag__, __compat__
|
||||||
|
from .results import *
|
||||||
|
from .core import *
|
||||||
|
from .core import _builtin_exprs as core_builtin_exprs
|
||||||
|
from .helpers import *
|
||||||
|
from .helpers import _builtin_exprs as helper_builtin_exprs
|
||||||
|
|
||||||
|
from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
|
||||||
|
from .testing import pyparsing_test as testing
|
||||||
|
from .common import (
|
||||||
|
pyparsing_common as common,
|
||||||
|
_builtin_exprs as common_builtin_exprs,
|
||||||
|
)
|
||||||
|
|
||||||
|
# define backward compat synonyms
|
||||||
|
if "pyparsing_unicode" not in globals():
|
||||||
|
pyparsing_unicode = unicode
|
||||||
|
if "pyparsing_common" not in globals():
|
||||||
|
pyparsing_common = common
|
||||||
|
if "pyparsing_test" not in globals():
|
||||||
|
pyparsing_test = testing
|
||||||
|
|
||||||
|
core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"__version__",
|
||||||
|
"__version_time__",
|
||||||
|
"__author__",
|
||||||
|
"__compat__",
|
||||||
|
"__diag__",
|
||||||
|
"And",
|
||||||
|
"AtLineStart",
|
||||||
|
"AtStringStart",
|
||||||
|
"CaselessKeyword",
|
||||||
|
"CaselessLiteral",
|
||||||
|
"CharsNotIn",
|
||||||
|
"Combine",
|
||||||
|
"Dict",
|
||||||
|
"Each",
|
||||||
|
"Empty",
|
||||||
|
"FollowedBy",
|
||||||
|
"Forward",
|
||||||
|
"GoToColumn",
|
||||||
|
"Group",
|
||||||
|
"IndentedBlock",
|
||||||
|
"Keyword",
|
||||||
|
"LineEnd",
|
||||||
|
"LineStart",
|
||||||
|
"Literal",
|
||||||
|
"Located",
|
||||||
|
"PrecededBy",
|
||||||
|
"MatchFirst",
|
||||||
|
"NoMatch",
|
||||||
|
"NotAny",
|
||||||
|
"OneOrMore",
|
||||||
|
"OnlyOnce",
|
||||||
|
"OpAssoc",
|
||||||
|
"Opt",
|
||||||
|
"Optional",
|
||||||
|
"Or",
|
||||||
|
"ParseBaseException",
|
||||||
|
"ParseElementEnhance",
|
||||||
|
"ParseException",
|
||||||
|
"ParseExpression",
|
||||||
|
"ParseFatalException",
|
||||||
|
"ParseResults",
|
||||||
|
"ParseSyntaxException",
|
||||||
|
"ParserElement",
|
||||||
|
"PositionToken",
|
||||||
|
"QuotedString",
|
||||||
|
"RecursiveGrammarException",
|
||||||
|
"Regex",
|
||||||
|
"SkipTo",
|
||||||
|
"StringEnd",
|
||||||
|
"StringStart",
|
||||||
|
"Suppress",
|
||||||
|
"Token",
|
||||||
|
"TokenConverter",
|
||||||
|
"White",
|
||||||
|
"Word",
|
||||||
|
"WordEnd",
|
||||||
|
"WordStart",
|
||||||
|
"ZeroOrMore",
|
||||||
|
"Char",
|
||||||
|
"alphanums",
|
||||||
|
"alphas",
|
||||||
|
"alphas8bit",
|
||||||
|
"any_close_tag",
|
||||||
|
"any_open_tag",
|
||||||
|
"c_style_comment",
|
||||||
|
"col",
|
||||||
|
"common_html_entity",
|
||||||
|
"counted_array",
|
||||||
|
"cpp_style_comment",
|
||||||
|
"dbl_quoted_string",
|
||||||
|
"dbl_slash_comment",
|
||||||
|
"delimited_list",
|
||||||
|
"dict_of",
|
||||||
|
"empty",
|
||||||
|
"hexnums",
|
||||||
|
"html_comment",
|
||||||
|
"identchars",
|
||||||
|
"identbodychars",
|
||||||
|
"java_style_comment",
|
||||||
|
"line",
|
||||||
|
"line_end",
|
||||||
|
"line_start",
|
||||||
|
"lineno",
|
||||||
|
"make_html_tags",
|
||||||
|
"make_xml_tags",
|
||||||
|
"match_only_at_col",
|
||||||
|
"match_previous_expr",
|
||||||
|
"match_previous_literal",
|
||||||
|
"nested_expr",
|
||||||
|
"null_debug_action",
|
||||||
|
"nums",
|
||||||
|
"one_of",
|
||||||
|
"printables",
|
||||||
|
"punc8bit",
|
||||||
|
"python_style_comment",
|
||||||
|
"quoted_string",
|
||||||
|
"remove_quotes",
|
||||||
|
"replace_with",
|
||||||
|
"replace_html_entity",
|
||||||
|
"rest_of_line",
|
||||||
|
"sgl_quoted_string",
|
||||||
|
"srange",
|
||||||
|
"string_end",
|
||||||
|
"string_start",
|
||||||
|
"trace_parse_action",
|
||||||
|
"unicode_string",
|
||||||
|
"with_attribute",
|
||||||
|
"indentedBlock",
|
||||||
|
"original_text_for",
|
||||||
|
"ungroup",
|
||||||
|
"infix_notation",
|
||||||
|
"locatedExpr",
|
||||||
|
"with_class",
|
||||||
|
"CloseMatch",
|
||||||
|
"token_map",
|
||||||
|
"pyparsing_common",
|
||||||
|
"pyparsing_unicode",
|
||||||
|
"unicode_set",
|
||||||
|
"condition_as_parse_action",
|
||||||
|
"pyparsing_test",
|
||||||
|
# pre-PEP8 compatibility names
|
||||||
|
"__versionTime__",
|
||||||
|
"anyCloseTag",
|
||||||
|
"anyOpenTag",
|
||||||
|
"cStyleComment",
|
||||||
|
"commonHTMLEntity",
|
||||||
|
"countedArray",
|
||||||
|
"cppStyleComment",
|
||||||
|
"dblQuotedString",
|
||||||
|
"dblSlashComment",
|
||||||
|
"delimitedList",
|
||||||
|
"dictOf",
|
||||||
|
"htmlComment",
|
||||||
|
"javaStyleComment",
|
||||||
|
"lineEnd",
|
||||||
|
"lineStart",
|
||||||
|
"makeHTMLTags",
|
||||||
|
"makeXMLTags",
|
||||||
|
"matchOnlyAtCol",
|
||||||
|
"matchPreviousExpr",
|
||||||
|
"matchPreviousLiteral",
|
||||||
|
"nestedExpr",
|
||||||
|
"nullDebugAction",
|
||||||
|
"oneOf",
|
||||||
|
"opAssoc",
|
||||||
|
"pythonStyleComment",
|
||||||
|
"quotedString",
|
||||||
|
"removeQuotes",
|
||||||
|
"replaceHTMLEntity",
|
||||||
|
"replaceWith",
|
||||||
|
"restOfLine",
|
||||||
|
"sglQuotedString",
|
||||||
|
"stringEnd",
|
||||||
|
"stringStart",
|
||||||
|
"traceParseAction",
|
||||||
|
"unicodeString",
|
||||||
|
"withAttribute",
|
||||||
|
"indentedBlock",
|
||||||
|
"originalTextFor",
|
||||||
|
"infixNotation",
|
||||||
|
"locatedExpr",
|
||||||
|
"withClass",
|
||||||
|
"tokenMap",
|
||||||
|
"conditionAsParseAction",
|
||||||
|
"autoname_elements",
|
||||||
|
]
|
|
@ -0,0 +1,207 @@
|
||||||
|
# actions.py
|
||||||
|
|
||||||
|
from .exceptions import ParseException
|
||||||
|
from .util import col
|
||||||
|
|
||||||
|
|
||||||
|
class OnlyOnce:
|
||||||
|
"""
|
||||||
|
Wrapper for parse actions, to ensure they are only called once.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, method_call):
|
||||||
|
from .core import _trim_arity
|
||||||
|
|
||||||
|
self.callable = _trim_arity(method_call)
|
||||||
|
self.called = False
|
||||||
|
|
||||||
|
def __call__(self, s, l, t):
|
||||||
|
if not self.called:
|
||||||
|
results = self.callable(s, l, t)
|
||||||
|
self.called = True
|
||||||
|
return results
|
||||||
|
raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""
|
||||||
|
Allow the associated parse action to be called once more.
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.called = False
|
||||||
|
|
||||||
|
|
||||||
|
def match_only_at_col(n):
|
||||||
|
"""
|
||||||
|
Helper method for defining parse actions that require matching at
|
||||||
|
a specific column in the input text.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def verify_col(strg, locn, toks):
|
||||||
|
if col(locn, strg) != n:
|
||||||
|
raise ParseException(strg, locn, "matched token not at column {}".format(n))
|
||||||
|
|
||||||
|
return verify_col
|
||||||
|
|
||||||
|
|
||||||
|
def replace_with(repl_str):
|
||||||
|
"""
|
||||||
|
Helper method for common parse actions that simply return
|
||||||
|
a literal value. Especially useful when used with
|
||||||
|
:class:`transform_string<ParserElement.transform_string>` ().
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
|
||||||
|
na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
|
||||||
|
term = na | num
|
||||||
|
|
||||||
|
OneOrMore(term).parse_string("324 234 N/A 234") # -> [324, 234, nan, 234]
|
||||||
|
"""
|
||||||
|
return lambda s, l, t: [repl_str]
|
||||||
|
|
||||||
|
|
||||||
|
def remove_quotes(s, l, t):
|
||||||
|
"""
|
||||||
|
Helper parse action for removing quotation marks from parsed
|
||||||
|
quoted strings.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
# by default, quotation marks are included in parsed results
|
||||||
|
quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
|
||||||
|
|
||||||
|
# use remove_quotes to strip quotation marks from parsed results
|
||||||
|
quoted_string.set_parse_action(remove_quotes)
|
||||||
|
quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
|
||||||
|
"""
|
||||||
|
return t[0][1:-1]
|
||||||
|
|
||||||
|
|
||||||
|
def with_attribute(*args, **attr_dict):
|
||||||
|
"""
|
||||||
|
Helper to create a validating parse action to be used with start
|
||||||
|
tags created with :class:`make_xml_tags` or
|
||||||
|
:class:`make_html_tags`. Use ``with_attribute`` to qualify
|
||||||
|
a starting tag with a required attribute value, to avoid false
|
||||||
|
matches on common tags such as ``<TD>`` or ``<DIV>``.
|
||||||
|
|
||||||
|
Call ``with_attribute`` with a series of attribute names and
|
||||||
|
values. Specify the list of filter attributes names and values as:
|
||||||
|
|
||||||
|
- keyword arguments, as in ``(align="right")``, or
|
||||||
|
- as an explicit dict with ``**`` operator, when an attribute
|
||||||
|
name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
|
||||||
|
- a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
|
||||||
|
|
||||||
|
For attribute names with a namespace prefix, you must use the second
|
||||||
|
form. Attribute names are matched insensitive to upper/lower case.
|
||||||
|
|
||||||
|
If just testing for ``class`` (with or without a namespace), use
|
||||||
|
:class:`with_class`.
|
||||||
|
|
||||||
|
To verify that the attribute exists, but without specifying a value,
|
||||||
|
pass ``with_attribute.ANY_VALUE`` as the value.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
html = '''
|
||||||
|
<div>
|
||||||
|
Some text
|
||||||
|
<div type="grid">1 4 0 1 0</div>
|
||||||
|
<div type="graph">1,3 2,3 1,1</div>
|
||||||
|
<div>this has no type</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
'''
|
||||||
|
div,div_end = make_html_tags("div")
|
||||||
|
|
||||||
|
# only match div tag having a type attribute with value "grid"
|
||||||
|
div_grid = div().set_parse_action(with_attribute(type="grid"))
|
||||||
|
grid_expr = div_grid + SkipTo(div | div_end)("body")
|
||||||
|
for grid_header in grid_expr.search_string(html):
|
||||||
|
print(grid_header.body)
|
||||||
|
|
||||||
|
# construct a match with any div tag having a type attribute, regardless of the value
|
||||||
|
div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE))
|
||||||
|
div_expr = div_any_type + SkipTo(div | div_end)("body")
|
||||||
|
for div_header in div_expr.search_string(html):
|
||||||
|
print(div_header.body)
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
1 4 0 1 0
|
||||||
|
|
||||||
|
1 4 0 1 0
|
||||||
|
1,3 2,3 1,1
|
||||||
|
"""
|
||||||
|
if args:
|
||||||
|
attrs = args[:]
|
||||||
|
else:
|
||||||
|
attrs = attr_dict.items()
|
||||||
|
attrs = [(k, v) for k, v in attrs]
|
||||||
|
|
||||||
|
def pa(s, l, tokens):
|
||||||
|
for attrName, attrValue in attrs:
|
||||||
|
if attrName not in tokens:
|
||||||
|
raise ParseException(s, l, "no matching attribute " + attrName)
|
||||||
|
if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue:
|
||||||
|
raise ParseException(
|
||||||
|
s,
|
||||||
|
l,
|
||||||
|
"attribute {!r} has value {!r}, must be {!r}".format(
|
||||||
|
attrName, tokens[attrName], attrValue
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
return pa
|
||||||
|
|
||||||
|
|
||||||
|
with_attribute.ANY_VALUE = object()
|
||||||
|
|
||||||
|
|
||||||
|
def with_class(classname, namespace=""):
|
||||||
|
"""
|
||||||
|
Simplified version of :class:`with_attribute` when
|
||||||
|
matching on a div class - made difficult because ``class`` is
|
||||||
|
a reserved word in Python.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
html = '''
|
||||||
|
<div>
|
||||||
|
Some text
|
||||||
|
<div class="grid">1 4 0 1 0</div>
|
||||||
|
<div class="graph">1,3 2,3 1,1</div>
|
||||||
|
<div>this <div> has no class</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
'''
|
||||||
|
div,div_end = make_html_tags("div")
|
||||||
|
div_grid = div().set_parse_action(with_class("grid"))
|
||||||
|
|
||||||
|
grid_expr = div_grid + SkipTo(div | div_end)("body")
|
||||||
|
for grid_header in grid_expr.search_string(html):
|
||||||
|
print(grid_header.body)
|
||||||
|
|
||||||
|
div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE))
|
||||||
|
div_expr = div_any_type + SkipTo(div | div_end)("body")
|
||||||
|
for div_header in div_expr.search_string(html):
|
||||||
|
print(div_header.body)
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
1 4 0 1 0
|
||||||
|
|
||||||
|
1 4 0 1 0
|
||||||
|
1,3 2,3 1,1
|
||||||
|
"""
|
||||||
|
classattr = "{}:class".format(namespace) if namespace else "class"
|
||||||
|
return with_attribute(**{classattr: classname})
|
||||||
|
|
||||||
|
|
||||||
|
# pre-PEP8 compatibility symbols
|
||||||
|
replaceWith = replace_with
|
||||||
|
removeQuotes = remove_quotes
|
||||||
|
withAttribute = with_attribute
|
||||||
|
withClass = with_class
|
||||||
|
matchOnlyAtCol = match_only_at_col
|
|
@ -0,0 +1,424 @@
|
||||||
|
# common.py
|
||||||
|
from .core import *
|
||||||
|
from .helpers import delimited_list, any_open_tag, any_close_tag
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
# some other useful expressions - using lower-case class name since we are really using this as a namespace
|
||||||
|
class pyparsing_common:
|
||||||
|
"""Here are some common low-level expressions that may be useful in
|
||||||
|
jump-starting parser development:
|
||||||
|
|
||||||
|
- numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
|
||||||
|
:class:`scientific notation<sci_real>`)
|
||||||
|
- common :class:`programming identifiers<identifier>`
|
||||||
|
- network addresses (:class:`MAC<mac_address>`,
|
||||||
|
:class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
|
||||||
|
- ISO8601 :class:`dates<iso8601_date>` and
|
||||||
|
:class:`datetime<iso8601_datetime>`
|
||||||
|
- :class:`UUID<uuid>`
|
||||||
|
- :class:`comma-separated list<comma_separated_list>`
|
||||||
|
- :class:`url`
|
||||||
|
|
||||||
|
Parse actions:
|
||||||
|
|
||||||
|
- :class:`convertToInteger`
|
||||||
|
- :class:`convertToFloat`
|
||||||
|
- :class:`convertToDate`
|
||||||
|
- :class:`convertToDatetime`
|
||||||
|
- :class:`stripHTMLTags`
|
||||||
|
- :class:`upcaseTokens`
|
||||||
|
- :class:`downcaseTokens`
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
pyparsing_common.number.runTests('''
|
||||||
|
# any int or real number, returned as the appropriate type
|
||||||
|
100
|
||||||
|
-100
|
||||||
|
+100
|
||||||
|
3.14159
|
||||||
|
6.02e23
|
||||||
|
1e-12
|
||||||
|
''')
|
||||||
|
|
||||||
|
pyparsing_common.fnumber.runTests('''
|
||||||
|
# any int or real number, returned as float
|
||||||
|
100
|
||||||
|
-100
|
||||||
|
+100
|
||||||
|
3.14159
|
||||||
|
6.02e23
|
||||||
|
1e-12
|
||||||
|
''')
|
||||||
|
|
||||||
|
pyparsing_common.hex_integer.runTests('''
|
||||||
|
# hex numbers
|
||||||
|
100
|
||||||
|
FF
|
||||||
|
''')
|
||||||
|
|
||||||
|
pyparsing_common.fraction.runTests('''
|
||||||
|
# fractions
|
||||||
|
1/2
|
||||||
|
-3/4
|
||||||
|
''')
|
||||||
|
|
||||||
|
pyparsing_common.mixed_integer.runTests('''
|
||||||
|
# mixed fractions
|
||||||
|
1
|
||||||
|
1/2
|
||||||
|
-3/4
|
||||||
|
1-3/4
|
||||||
|
''')
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
|
||||||
|
pyparsing_common.uuid.runTests('''
|
||||||
|
# uuid
|
||||||
|
12345678-1234-5678-1234-567812345678
|
||||||
|
''')
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
# any int or real number, returned as the appropriate type
|
||||||
|
100
|
||||||
|
[100]
|
||||||
|
|
||||||
|
-100
|
||||||
|
[-100]
|
||||||
|
|
||||||
|
+100
|
||||||
|
[100]
|
||||||
|
|
||||||
|
3.14159
|
||||||
|
[3.14159]
|
||||||
|
|
||||||
|
6.02e23
|
||||||
|
[6.02e+23]
|
||||||
|
|
||||||
|
1e-12
|
||||||
|
[1e-12]
|
||||||
|
|
||||||
|
# any int or real number, returned as float
|
||||||
|
100
|
||||||
|
[100.0]
|
||||||
|
|
||||||
|
-100
|
||||||
|
[-100.0]
|
||||||
|
|
||||||
|
+100
|
||||||
|
[100.0]
|
||||||
|
|
||||||
|
3.14159
|
||||||
|
[3.14159]
|
||||||
|
|
||||||
|
6.02e23
|
||||||
|
[6.02e+23]
|
||||||
|
|
||||||
|
1e-12
|
||||||
|
[1e-12]
|
||||||
|
|
||||||
|
# hex numbers
|
||||||
|
100
|
||||||
|
[256]
|
||||||
|
|
||||||
|
FF
|
||||||
|
[255]
|
||||||
|
|
||||||
|
# fractions
|
||||||
|
1/2
|
||||||
|
[0.5]
|
||||||
|
|
||||||
|
-3/4
|
||||||
|
[-0.75]
|
||||||
|
|
||||||
|
# mixed fractions
|
||||||
|
1
|
||||||
|
[1]
|
||||||
|
|
||||||
|
1/2
|
||||||
|
[0.5]
|
||||||
|
|
||||||
|
-3/4
|
||||||
|
[-0.75]
|
||||||
|
|
||||||
|
1-3/4
|
||||||
|
[1.75]
|
||||||
|
|
||||||
|
# uuid
|
||||||
|
12345678-1234-5678-1234-567812345678
|
||||||
|
[UUID('12345678-1234-5678-1234-567812345678')]
|
||||||
|
"""
|
||||||
|
|
||||||
|
convert_to_integer = token_map(int)
|
||||||
|
"""
|
||||||
|
Parse action for converting parsed integers to Python int
|
||||||
|
"""
|
||||||
|
|
||||||
|
convert_to_float = token_map(float)
|
||||||
|
"""
|
||||||
|
Parse action for converting parsed numbers to Python float
|
||||||
|
"""
|
||||||
|
|
||||||
|
integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer)
|
||||||
|
"""expression that parses an unsigned integer, returns an int"""
|
||||||
|
|
||||||
|
hex_integer = (
|
||||||
|
Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
|
||||||
|
)
|
||||||
|
"""expression that parses a hexadecimal integer, returns an int"""
|
||||||
|
|
||||||
|
signed_integer = (
|
||||||
|
Regex(r"[+-]?\d+")
|
||||||
|
.set_name("signed integer")
|
||||||
|
.set_parse_action(convert_to_integer)
|
||||||
|
)
|
||||||
|
"""expression that parses an integer with optional leading sign, returns an int"""
|
||||||
|
|
||||||
|
fraction = (
|
||||||
|
signed_integer().set_parse_action(convert_to_float)
|
||||||
|
+ "/"
|
||||||
|
+ signed_integer().set_parse_action(convert_to_float)
|
||||||
|
).set_name("fraction")
|
||||||
|
"""fractional expression of an integer divided by an integer, returns a float"""
|
||||||
|
fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
|
||||||
|
|
||||||
|
mixed_integer = (
|
||||||
|
fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
|
||||||
|
).set_name("fraction or mixed integer-fraction")
|
||||||
|
"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
|
||||||
|
mixed_integer.add_parse_action(sum)
|
||||||
|
|
||||||
|
real = (
|
||||||
|
Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
|
||||||
|
.set_name("real number")
|
||||||
|
.set_parse_action(convert_to_float)
|
||||||
|
)
|
||||||
|
"""expression that parses a floating point number and returns a float"""
|
||||||
|
|
||||||
|
sci_real = (
|
||||||
|
Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
|
||||||
|
.set_name("real number with scientific notation")
|
||||||
|
.set_parse_action(convert_to_float)
|
||||||
|
)
|
||||||
|
"""expression that parses a floating point number with optional
|
||||||
|
scientific notation and returns a float"""
|
||||||
|
|
||||||
|
# streamlining this expression makes the docs nicer-looking
|
||||||
|
number = (sci_real | real | signed_integer).setName("number").streamline()
|
||||||
|
"""any numeric expression, returns the corresponding Python type"""
|
||||||
|
|
||||||
|
fnumber = (
|
||||||
|
Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?")
|
||||||
|
.set_name("fnumber")
|
||||||
|
.set_parse_action(convert_to_float)
|
||||||
|
)
|
||||||
|
"""any int or real number, returned as float"""
|
||||||
|
|
||||||
|
identifier = Word(identchars, identbodychars).set_name("identifier")
|
||||||
|
"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
|
||||||
|
|
||||||
|
ipv4_address = Regex(
|
||||||
|
r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
|
||||||
|
).set_name("IPv4 address")
|
||||||
|
"IPv4 address (``0.0.0.0 - 255.255.255.255``)"
|
||||||
|
|
||||||
|
_ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
|
||||||
|
_full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
|
||||||
|
"full IPv6 address"
|
||||||
|
)
|
||||||
|
_short_ipv6_address = (
|
||||||
|
Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
|
||||||
|
+ "::"
|
||||||
|
+ Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
|
||||||
|
).set_name("short IPv6 address")
|
||||||
|
_short_ipv6_address.add_condition(
|
||||||
|
lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
|
||||||
|
)
|
||||||
|
_mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
|
||||||
|
ipv6_address = Combine(
|
||||||
|
(_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
|
||||||
|
"IPv6 address"
|
||||||
|
)
|
||||||
|
).set_name("IPv6 address")
|
||||||
|
"IPv6 address (long, short, or mixed form)"
|
||||||
|
|
||||||
|
mac_address = Regex(
|
||||||
|
r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
|
||||||
|
).set_name("MAC address")
|
||||||
|
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def convert_to_date(fmt: str = "%Y-%m-%d"):
|
||||||
|
"""
|
||||||
|
Helper to create a parse action for converting parsed date string to Python datetime.date
|
||||||
|
|
||||||
|
Params -
|
||||||
|
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
date_expr = pyparsing_common.iso8601_date.copy()
|
||||||
|
date_expr.setParseAction(pyparsing_common.convertToDate())
|
||||||
|
print(date_expr.parseString("1999-12-31"))
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
[datetime.date(1999, 12, 31)]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def cvt_fn(ss, ll, tt):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(tt[0], fmt).date()
|
||||||
|
except ValueError as ve:
|
||||||
|
raise ParseException(ss, ll, str(ve))
|
||||||
|
|
||||||
|
return cvt_fn
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
|
||||||
|
"""Helper to create a parse action for converting parsed
|
||||||
|
datetime string to Python datetime.datetime
|
||||||
|
|
||||||
|
Params -
|
||||||
|
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
dt_expr = pyparsing_common.iso8601_datetime.copy()
|
||||||
|
dt_expr.setParseAction(pyparsing_common.convertToDatetime())
|
||||||
|
print(dt_expr.parseString("1999-12-31T23:59:59.999"))
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def cvt_fn(s, l, t):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(t[0], fmt)
|
||||||
|
except ValueError as ve:
|
||||||
|
raise ParseException(s, l, str(ve))
|
||||||
|
|
||||||
|
return cvt_fn
|
||||||
|
|
||||||
|
iso8601_date = Regex(
|
||||||
|
r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
|
||||||
|
).set_name("ISO8601 date")
|
||||||
|
"ISO8601 date (``yyyy-mm-dd``)"
|
||||||
|
|
||||||
|
iso8601_datetime = Regex(
|
||||||
|
r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
|
||||||
|
).set_name("ISO8601 datetime")
|
||||||
|
"ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
|
||||||
|
|
||||||
|
uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
|
||||||
|
"UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
|
||||||
|
|
||||||
|
_html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def strip_html_tags(s: str, l: int, tokens: ParseResults):
|
||||||
|
"""Parse action to remove HTML tags from web page HTML source
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
# strip HTML links from normal text
|
||||||
|
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
|
||||||
|
td, td_end = makeHTMLTags("TD")
|
||||||
|
table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
|
||||||
|
print(table_text.parseString(text).body)
|
||||||
|
|
||||||
|
Prints::
|
||||||
|
|
||||||
|
More info at the pyparsing wiki page
|
||||||
|
"""
|
||||||
|
return pyparsing_common._html_stripper.transform_string(tokens[0])
|
||||||
|
|
||||||
|
_commasepitem = (
|
||||||
|
Combine(
|
||||||
|
OneOrMore(
|
||||||
|
~Literal(",")
|
||||||
|
+ ~LineEnd()
|
||||||
|
+ Word(printables, exclude_chars=",")
|
||||||
|
+ Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.streamline()
|
||||||
|
.set_name("commaItem")
|
||||||
|
)
|
||||||
|
comma_separated_list = delimited_list(
|
||||||
|
Opt(quoted_string.copy() | _commasepitem, default="")
|
||||||
|
).set_name("comma separated list")
|
||||||
|
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
|
||||||
|
|
||||||
|
upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
|
||||||
|
"""Parse action to convert tokens to upper case."""
|
||||||
|
|
||||||
|
downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
|
||||||
|
"""Parse action to convert tokens to lower case."""
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
url = Regex(
|
||||||
|
# https://mathiasbynens.be/demo/url-regex
|
||||||
|
# https://gist.github.com/dperini/729294
|
||||||
|
r"^" +
|
||||||
|
# protocol identifier (optional)
|
||||||
|
# short syntax // still required
|
||||||
|
r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
|
||||||
|
# user:pass BasicAuth (optional)
|
||||||
|
r"(?:(?P<auth>\S+(?::\S*)?)@)?" +
|
||||||
|
r"(?P<host>" +
|
||||||
|
# IP address exclusion
|
||||||
|
# private & local networks
|
||||||
|
r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
|
||||||
|
r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
|
||||||
|
r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
|
||||||
|
# IP address dotted notation octets
|
||||||
|
# excludes loopback network 0.0.0.0
|
||||||
|
# excludes reserved space >= 224.0.0.0
|
||||||
|
# excludes network & broadcast addresses
|
||||||
|
# (first & last IP address of each class)
|
||||||
|
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
|
||||||
|
r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
|
||||||
|
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
|
||||||
|
r"|" +
|
||||||
|
# host & domain names, may end with dot
|
||||||
|
# can be replaced by a shortest alternative
|
||||||
|
# (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
|
||||||
|
r"(?:" +
|
||||||
|
r"(?:" +
|
||||||
|
r"[a-z0-9\u00a1-\uffff]" +
|
||||||
|
r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
|
||||||
|
r")?" +
|
||||||
|
r"[a-z0-9\u00a1-\uffff]\." +
|
||||||
|
r")+" +
|
||||||
|
# TLD identifier name, may end with dot
|
||||||
|
r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
|
||||||
|
r")" +
|
||||||
|
# port number (optional)
|
||||||
|
r"(:(?P<port>\d{2,5}))?" +
|
||||||
|
# resource path (optional)
|
||||||
|
r"(?P<path>\/[^?# ]*)?" +
|
||||||
|
# query string (optional)
|
||||||
|
r"(\?(?P<query>[^#]*))?" +
|
||||||
|
# fragment (optional)
|
||||||
|
r"(#(?P<fragment>\S*))?" +
|
||||||
|
r"$"
|
||||||
|
).set_name("url")
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
# pre-PEP8 compatibility names
|
||||||
|
convertToInteger = convert_to_integer
|
||||||
|
convertToFloat = convert_to_float
|
||||||
|
convertToDate = convert_to_date
|
||||||
|
convertToDatetime = convert_to_datetime
|
||||||
|
stripHTMLTags = strip_html_tags
|
||||||
|
upcaseTokens = upcase_tokens
|
||||||
|
downcaseTokens = downcase_tokens
|
||||||
|
|
||||||
|
|
||||||
|
_builtin_exprs = [
|
||||||
|
v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
|
||||||
|
]
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,593 @@
|
||||||
|
import railroad
|
||||||
|
from pip._vendor import pyparsing
|
||||||
|
from pip._vendor.pkg_resources import resource_filename
|
||||||
|
from typing import (
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
NamedTuple,
|
||||||
|
Generic,
|
||||||
|
TypeVar,
|
||||||
|
Dict,
|
||||||
|
Callable,
|
||||||
|
Set,
|
||||||
|
Iterable,
|
||||||
|
)
|
||||||
|
from jinja2 import Template
|
||||||
|
from io import StringIO
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
|
||||||
|
template = Template(fp.read())
|
||||||
|
|
||||||
|
# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
|
||||||
|
NamedDiagram = NamedTuple(
|
||||||
|
"NamedDiagram",
|
||||||
|
[("name", str), ("diagram", Optional[railroad.DiagramItem]), ("index", int)],
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
A simple structure for associating a name with a railroad diagram
|
||||||
|
"""
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
class EachItem(railroad.Group):
|
||||||
|
"""
|
||||||
|
Custom railroad item to compose a:
|
||||||
|
- Group containing a
|
||||||
|
- OneOrMore containing a
|
||||||
|
- Choice of the elements in the Each
|
||||||
|
with the group label indicating that all must be matched
|
||||||
|
"""
|
||||||
|
|
||||||
|
all_label = "[ALL]"
|
||||||
|
|
||||||
|
def __init__(self, *items):
|
||||||
|
choice_item = railroad.Choice(len(items) - 1, *items)
|
||||||
|
one_or_more_item = railroad.OneOrMore(item=choice_item)
|
||||||
|
super().__init__(one_or_more_item, label=self.all_label)
|
||||||
|
|
||||||
|
|
||||||
|
class AnnotatedItem(railroad.Group):
|
||||||
|
"""
|
||||||
|
Simple subclass of Group that creates an annotation label
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, label: str, item):
|
||||||
|
super().__init__(item=item, label="[{}]".format(label))
|
||||||
|
|
||||||
|
|
||||||
|
class EditablePartial(Generic[T]):
|
||||||
|
"""
|
||||||
|
Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
|
||||||
|
constructed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# We need this here because the railroad constructors actually transform the data, so can't be called until the
|
||||||
|
# entire tree is assembled
|
||||||
|
|
||||||
|
def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
|
||||||
|
self.func = func
|
||||||
|
self.args = args
|
||||||
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
|
||||||
|
"""
|
||||||
|
If you call this function in the same way that you would call the constructor, it will store the arguments
|
||||||
|
as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
|
||||||
|
"""
|
||||||
|
return EditablePartial(func=func, args=list(args), kwargs=kwargs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return self.kwargs["name"]
|
||||||
|
|
||||||
|
def __call__(self) -> T:
|
||||||
|
"""
|
||||||
|
Evaluate the partial and return the result
|
||||||
|
"""
|
||||||
|
args = self.args.copy()
|
||||||
|
kwargs = self.kwargs.copy()
|
||||||
|
|
||||||
|
# This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
|
||||||
|
# args=['list', 'of', 'things'])
|
||||||
|
arg_spec = inspect.getfullargspec(self.func)
|
||||||
|
if arg_spec.varargs in self.kwargs:
|
||||||
|
args += kwargs.pop(arg_spec.varargs)
|
||||||
|
|
||||||
|
return self.func(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str:
|
||||||
|
"""
|
||||||
|
Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
|
||||||
|
:params kwargs: kwargs to be passed in to the template
|
||||||
|
"""
|
||||||
|
data = []
|
||||||
|
for diagram in diagrams:
|
||||||
|
io = StringIO()
|
||||||
|
diagram.diagram.writeSvg(io.write)
|
||||||
|
title = diagram.name
|
||||||
|
if diagram.index == 0:
|
||||||
|
title += " (root)"
|
||||||
|
data.append({"title": title, "text": "", "svg": io.getvalue()})
|
||||||
|
|
||||||
|
return template.render(diagrams=data, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_partial(partial: "EditablePartial[T]") -> T:
|
||||||
|
"""
|
||||||
|
Recursively resolves a collection of Partials into whatever type they are
|
||||||
|
"""
|
||||||
|
if isinstance(partial, EditablePartial):
|
||||||
|
partial.args = resolve_partial(partial.args)
|
||||||
|
partial.kwargs = resolve_partial(partial.kwargs)
|
||||||
|
return partial()
|
||||||
|
elif isinstance(partial, list):
|
||||||
|
return [resolve_partial(x) for x in partial]
|
||||||
|
elif isinstance(partial, dict):
|
||||||
|
return {key: resolve_partial(x) for key, x in partial.items()}
|
||||||
|
else:
|
||||||
|
return partial
|
||||||
|
|
||||||
|
|
||||||
|
def to_railroad(
|
||||||
|
element: pyparsing.ParserElement,
|
||||||
|
diagram_kwargs: Optional[dict] = None,
|
||||||
|
vertical: int = 3,
|
||||||
|
show_results_names: bool = False,
|
||||||
|
) -> List[NamedDiagram]:
|
||||||
|
"""
|
||||||
|
Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
|
||||||
|
creation if you want to access the Railroad tree before it is converted to HTML
|
||||||
|
:param element: base element of the parser being diagrammed
|
||||||
|
:param diagram_kwargs: kwargs to pass to the Diagram() constructor
|
||||||
|
:param vertical: (optional) - int - limit at which number of alternatives should be
|
||||||
|
shown vertically instead of horizontally
|
||||||
|
:param show_results_names - bool to indicate whether results name annotations should be
|
||||||
|
included in the diagram
|
||||||
|
"""
|
||||||
|
# Convert the whole tree underneath the root
|
||||||
|
lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
|
||||||
|
_to_diagram_element(
|
||||||
|
element,
|
||||||
|
lookup=lookup,
|
||||||
|
parent=None,
|
||||||
|
vertical=vertical,
|
||||||
|
show_results_names=show_results_names,
|
||||||
|
)
|
||||||
|
|
||||||
|
root_id = id(element)
|
||||||
|
# Convert the root if it hasn't been already
|
||||||
|
if root_id in lookup:
|
||||||
|
if not element.customName:
|
||||||
|
lookup[root_id].name = ""
|
||||||
|
lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
|
||||||
|
|
||||||
|
# Now that we're finished, we can convert from intermediate structures into Railroad elements
|
||||||
|
diags = list(lookup.diagrams.values())
|
||||||
|
if len(diags) > 1:
|
||||||
|
# collapse out duplicate diags with the same name
|
||||||
|
seen = set()
|
||||||
|
deduped_diags = []
|
||||||
|
for d in diags:
|
||||||
|
# don't extract SkipTo elements, they are uninformative as subdiagrams
|
||||||
|
if d.name == "...":
|
||||||
|
continue
|
||||||
|
if d.name is not None and d.name not in seen:
|
||||||
|
seen.add(d.name)
|
||||||
|
deduped_diags.append(d)
|
||||||
|
resolved = [resolve_partial(partial) for partial in deduped_diags]
|
||||||
|
else:
|
||||||
|
# special case - if just one diagram, always display it, even if
|
||||||
|
# it has no name
|
||||||
|
resolved = [resolve_partial(partial) for partial in diags]
|
||||||
|
return sorted(resolved, key=lambda diag: diag.index)
|
||||||
|
|
||||||
|
|
||||||
|
def _should_vertical(
|
||||||
|
specification: int, exprs: Iterable[pyparsing.ParserElement]
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Returns true if we should return a vertical list of elements
|
||||||
|
"""
|
||||||
|
if specification is None:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return len(_visible_exprs(exprs)) >= specification
|
||||||
|
|
||||||
|
|
||||||
|
class ElementState:
|
||||||
|
"""
|
||||||
|
State recorded for an individual pyparsing Element
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Note: this should be a dataclass, but we have to support Python 3.5
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
element: pyparsing.ParserElement,
|
||||||
|
converted: EditablePartial,
|
||||||
|
parent: EditablePartial,
|
||||||
|
number: int,
|
||||||
|
name: str = None,
|
||||||
|
parent_index: Optional[int] = None,
|
||||||
|
):
|
||||||
|
#: The pyparsing element that this represents
|
||||||
|
self.element: pyparsing.ParserElement = element
|
||||||
|
#: The name of the element
|
||||||
|
self.name: str = name
|
||||||
|
#: The output Railroad element in an unconverted state
|
||||||
|
self.converted: EditablePartial = converted
|
||||||
|
#: The parent Railroad element, which we store so that we can extract this if it's duplicated
|
||||||
|
self.parent: EditablePartial = parent
|
||||||
|
#: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
|
||||||
|
self.number: int = number
|
||||||
|
#: The index of this inside its parent
|
||||||
|
self.parent_index: Optional[int] = parent_index
|
||||||
|
#: If true, we should extract this out into a subdiagram
|
||||||
|
self.extract: bool = False
|
||||||
|
#: If true, all of this element's children have been filled out
|
||||||
|
self.complete: bool = False
|
||||||
|
|
||||||
|
def mark_for_extraction(
|
||||||
|
self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
|
||||||
|
:param el_id: id of the element
|
||||||
|
:param state: element/diagram state tracker
|
||||||
|
:param name: name to use for this element's text
|
||||||
|
:param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
|
||||||
|
root element when we know we're finished
|
||||||
|
"""
|
||||||
|
self.extract = True
|
||||||
|
|
||||||
|
# Set the name
|
||||||
|
if not self.name:
|
||||||
|
if name:
|
||||||
|
# Allow forcing a custom name
|
||||||
|
self.name = name
|
||||||
|
elif self.element.customName:
|
||||||
|
self.name = self.element.customName
|
||||||
|
else:
|
||||||
|
self.name = ""
|
||||||
|
|
||||||
|
# Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
|
||||||
|
# to be added
|
||||||
|
# Also, if this is just a string literal etc, don't bother extracting it
|
||||||
|
if force or (self.complete and _worth_extracting(self.element)):
|
||||||
|
state.extract_into_diagram(el_id)
|
||||||
|
|
||||||
|
|
||||||
|
class ConverterState:
|
||||||
|
"""
|
||||||
|
Stores some state that persists between recursions into the element tree
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, diagram_kwargs: Optional[dict] = None):
|
||||||
|
#: A dictionary mapping ParserElements to state relating to them
|
||||||
|
self._element_diagram_states: Dict[int, ElementState] = {}
|
||||||
|
#: A dictionary mapping ParserElement IDs to subdiagrams generated from them
|
||||||
|
self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
|
||||||
|
#: The index of the next unnamed element
|
||||||
|
self.unnamed_index: int = 1
|
||||||
|
#: The index of the next element. This is used for sorting
|
||||||
|
self.index: int = 0
|
||||||
|
#: Shared kwargs that are used to customize the construction of diagrams
|
||||||
|
self.diagram_kwargs: dict = diagram_kwargs or {}
|
||||||
|
self.extracted_diagram_names: Set[str] = set()
|
||||||
|
|
||||||
|
def __setitem__(self, key: int, value: ElementState):
|
||||||
|
self._element_diagram_states[key] = value
|
||||||
|
|
||||||
|
def __getitem__(self, key: int) -> ElementState:
|
||||||
|
return self._element_diagram_states[key]
|
||||||
|
|
||||||
|
def __delitem__(self, key: int):
|
||||||
|
del self._element_diagram_states[key]
|
||||||
|
|
||||||
|
def __contains__(self, key: int):
|
||||||
|
return key in self._element_diagram_states
|
||||||
|
|
||||||
|
def generate_unnamed(self) -> int:
|
||||||
|
"""
|
||||||
|
Generate a number used in the name of an otherwise unnamed diagram
|
||||||
|
"""
|
||||||
|
self.unnamed_index += 1
|
||||||
|
return self.unnamed_index
|
||||||
|
|
||||||
|
def generate_index(self) -> int:
|
||||||
|
"""
|
||||||
|
Generate a number used to index a diagram
|
||||||
|
"""
|
||||||
|
self.index += 1
|
||||||
|
return self.index
|
||||||
|
|
||||||
|
def extract_into_diagram(self, el_id: int):
|
||||||
|
"""
|
||||||
|
Used when we encounter the same token twice in the same tree. When this
|
||||||
|
happens, we replace all instances of that token with a terminal, and
|
||||||
|
create a new subdiagram for the token
|
||||||
|
"""
|
||||||
|
position = self[el_id]
|
||||||
|
|
||||||
|
# Replace the original definition of this element with a regular block
|
||||||
|
if position.parent:
|
||||||
|
ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
|
||||||
|
if "item" in position.parent.kwargs:
|
||||||
|
position.parent.kwargs["item"] = ret
|
||||||
|
elif "items" in position.parent.kwargs:
|
||||||
|
position.parent.kwargs["items"][position.parent_index] = ret
|
||||||
|
|
||||||
|
# If the element we're extracting is a group, skip to its content but keep the title
|
||||||
|
if position.converted.func == railroad.Group:
|
||||||
|
content = position.converted.kwargs["item"]
|
||||||
|
else:
|
||||||
|
content = position.converted
|
||||||
|
|
||||||
|
self.diagrams[el_id] = EditablePartial.from_call(
|
||||||
|
NamedDiagram,
|
||||||
|
name=position.name,
|
||||||
|
diagram=EditablePartial.from_call(
|
||||||
|
railroad.Diagram, content, **self.diagram_kwargs
|
||||||
|
),
|
||||||
|
index=position.number,
|
||||||
|
)
|
||||||
|
|
||||||
|
del self[el_id]
|
||||||
|
|
||||||
|
|
||||||
|
def _worth_extracting(element: pyparsing.ParserElement) -> bool:
|
||||||
|
"""
|
||||||
|
Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
|
||||||
|
themselves have children, then its complex enough to extract
|
||||||
|
"""
|
||||||
|
children = element.recurse()
|
||||||
|
return any(child.recurse() for child in children)
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_diagram_item_enhancements(fn):
|
||||||
|
"""
|
||||||
|
decorator to ensure enhancements to a diagram item (such as results name annotations)
|
||||||
|
get applied on return from _to_diagram_element (we do this since there are several
|
||||||
|
returns in _to_diagram_element)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _inner(
|
||||||
|
element: pyparsing.ParserElement,
|
||||||
|
parent: Optional[EditablePartial],
|
||||||
|
lookup: ConverterState = None,
|
||||||
|
vertical: int = None,
|
||||||
|
index: int = 0,
|
||||||
|
name_hint: str = None,
|
||||||
|
show_results_names: bool = False,
|
||||||
|
) -> Optional[EditablePartial]:
|
||||||
|
|
||||||
|
ret = fn(
|
||||||
|
element,
|
||||||
|
parent,
|
||||||
|
lookup,
|
||||||
|
vertical,
|
||||||
|
index,
|
||||||
|
name_hint,
|
||||||
|
show_results_names,
|
||||||
|
)
|
||||||
|
|
||||||
|
# apply annotation for results name, if present
|
||||||
|
if show_results_names and ret is not None:
|
||||||
|
element_results_name = element.resultsName
|
||||||
|
if element_results_name:
|
||||||
|
# add "*" to indicate if this is a "list all results" name
|
||||||
|
element_results_name += "" if element.modalResults else "*"
|
||||||
|
ret = EditablePartial.from_call(
|
||||||
|
railroad.Group, item=ret, label=element_results_name
|
||||||
|
)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
return _inner
|
||||||
|
|
||||||
|
|
||||||
|
def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
|
||||||
|
non_diagramming_exprs = (
|
||||||
|
pyparsing.ParseElementEnhance,
|
||||||
|
pyparsing.PositionToken,
|
||||||
|
pyparsing.And._ErrorStop,
|
||||||
|
)
|
||||||
|
return [
|
||||||
|
e
|
||||||
|
for e in exprs
|
||||||
|
if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@_apply_diagram_item_enhancements
|
||||||
|
def _to_diagram_element(
|
||||||
|
element: pyparsing.ParserElement,
|
||||||
|
parent: Optional[EditablePartial],
|
||||||
|
lookup: ConverterState = None,
|
||||||
|
vertical: int = None,
|
||||||
|
index: int = 0,
|
||||||
|
name_hint: str = None,
|
||||||
|
show_results_names: bool = False,
|
||||||
|
) -> Optional[EditablePartial]:
|
||||||
|
"""
|
||||||
|
Recursively converts a PyParsing Element to a railroad Element
|
||||||
|
:param lookup: The shared converter state that keeps track of useful things
|
||||||
|
:param index: The index of this element within the parent
|
||||||
|
:param parent: The parent of this element in the output tree
|
||||||
|
:param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
|
||||||
|
it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
|
||||||
|
do so
|
||||||
|
:param name_hint: If provided, this will override the generated name
|
||||||
|
:param show_results_names: bool flag indicating whether to add annotations for results names
|
||||||
|
:returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
|
||||||
|
"""
|
||||||
|
exprs = element.recurse()
|
||||||
|
name = name_hint or element.customName or element.__class__.__name__
|
||||||
|
|
||||||
|
# Python's id() is used to provide a unique identifier for elements
|
||||||
|
el_id = id(element)
|
||||||
|
|
||||||
|
element_results_name = element.resultsName
|
||||||
|
|
||||||
|
# Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
|
||||||
|
if not element.customName:
|
||||||
|
if isinstance(
|
||||||
|
element,
|
||||||
|
(
|
||||||
|
pyparsing.TokenConverter,
|
||||||
|
# pyparsing.Forward,
|
||||||
|
pyparsing.Located,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
# However, if this element has a useful custom name, and its child does not, we can pass it on to the child
|
||||||
|
if exprs:
|
||||||
|
if not exprs[0].customName:
|
||||||
|
propagated_name = name
|
||||||
|
else:
|
||||||
|
propagated_name = None
|
||||||
|
|
||||||
|
return _to_diagram_element(
|
||||||
|
element.expr,
|
||||||
|
parent=parent,
|
||||||
|
lookup=lookup,
|
||||||
|
vertical=vertical,
|
||||||
|
index=index,
|
||||||
|
name_hint=propagated_name,
|
||||||
|
show_results_names=show_results_names,
|
||||||
|
)
|
||||||
|
|
||||||
|
# If the element isn't worth extracting, we always treat it as the first time we say it
|
||||||
|
if _worth_extracting(element):
|
||||||
|
if el_id in lookup:
|
||||||
|
# If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
|
||||||
|
# so we have to extract it into a new diagram.
|
||||||
|
looked_up = lookup[el_id]
|
||||||
|
looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
|
||||||
|
ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
elif el_id in lookup.diagrams:
|
||||||
|
# If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
|
||||||
|
# just put in a marker element that refers to the sub-diagram
|
||||||
|
ret = EditablePartial.from_call(
|
||||||
|
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
|
||||||
|
)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
# Recursively convert child elements
|
||||||
|
# Here we find the most relevant Railroad element for matching pyparsing Element
|
||||||
|
# We use ``items=[]`` here to hold the place for where the child elements will go once created
|
||||||
|
if isinstance(element, pyparsing.And):
|
||||||
|
# detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
|
||||||
|
# (all will have the same name, and resultsName)
|
||||||
|
if not exprs:
|
||||||
|
return None
|
||||||
|
if len(set((e.name, e.resultsName) for e in exprs)) == 1:
|
||||||
|
ret = EditablePartial.from_call(
|
||||||
|
railroad.OneOrMore, item="", repeat=str(len(exprs))
|
||||||
|
)
|
||||||
|
elif _should_vertical(vertical, exprs):
|
||||||
|
ret = EditablePartial.from_call(railroad.Stack, items=[])
|
||||||
|
else:
|
||||||
|
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
||||||
|
elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
|
||||||
|
if not exprs:
|
||||||
|
return None
|
||||||
|
if _should_vertical(vertical, exprs):
|
||||||
|
ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
|
||||||
|
else:
|
||||||
|
ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
|
||||||
|
elif isinstance(element, pyparsing.Each):
|
||||||
|
if not exprs:
|
||||||
|
return None
|
||||||
|
ret = EditablePartial.from_call(EachItem, items=[])
|
||||||
|
elif isinstance(element, pyparsing.NotAny):
|
||||||
|
ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
|
||||||
|
elif isinstance(element, pyparsing.FollowedBy):
|
||||||
|
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
|
||||||
|
elif isinstance(element, pyparsing.PrecededBy):
|
||||||
|
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
|
||||||
|
elif isinstance(element, pyparsing.Opt):
|
||||||
|
ret = EditablePartial.from_call(railroad.Optional, item="")
|
||||||
|
elif isinstance(element, pyparsing.OneOrMore):
|
||||||
|
ret = EditablePartial.from_call(railroad.OneOrMore, item="")
|
||||||
|
elif isinstance(element, pyparsing.ZeroOrMore):
|
||||||
|
ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
|
||||||
|
elif isinstance(element, pyparsing.Group):
|
||||||
|
ret = EditablePartial.from_call(
|
||||||
|
railroad.Group, item=None, label=element_results_name
|
||||||
|
)
|
||||||
|
elif isinstance(element, pyparsing.Empty) and not element.customName:
|
||||||
|
# Skip unnamed "Empty" elements
|
||||||
|
ret = None
|
||||||
|
elif len(exprs) > 1:
|
||||||
|
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
||||||
|
elif len(exprs) > 0 and not element_results_name:
|
||||||
|
ret = EditablePartial.from_call(railroad.Group, item="", label=name)
|
||||||
|
else:
|
||||||
|
terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
|
||||||
|
ret = terminal
|
||||||
|
|
||||||
|
if ret is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Indicate this element's position in the tree so we can extract it if necessary
|
||||||
|
lookup[el_id] = ElementState(
|
||||||
|
element=element,
|
||||||
|
converted=ret,
|
||||||
|
parent=parent,
|
||||||
|
parent_index=index,
|
||||||
|
number=lookup.generate_index(),
|
||||||
|
)
|
||||||
|
if element.customName:
|
||||||
|
lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for expr in exprs:
|
||||||
|
# Add a placeholder index in case we have to extract the child before we even add it to the parent
|
||||||
|
if "items" in ret.kwargs:
|
||||||
|
ret.kwargs["items"].insert(i, None)
|
||||||
|
|
||||||
|
item = _to_diagram_element(
|
||||||
|
expr,
|
||||||
|
parent=ret,
|
||||||
|
lookup=lookup,
|
||||||
|
vertical=vertical,
|
||||||
|
index=i,
|
||||||
|
show_results_names=show_results_names,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Some elements don't need to be shown in the diagram
|
||||||
|
if item is not None:
|
||||||
|
if "item" in ret.kwargs:
|
||||||
|
ret.kwargs["item"] = item
|
||||||
|
elif "items" in ret.kwargs:
|
||||||
|
# If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
|
||||||
|
ret.kwargs["items"][i] = item
|
||||||
|
i += 1
|
||||||
|
elif "items" in ret.kwargs:
|
||||||
|
# If we're supposed to skip this element, remove it from the parent
|
||||||
|
del ret.kwargs["items"][i]
|
||||||
|
|
||||||
|
# If all this items children are none, skip this item
|
||||||
|
if ret and (
|
||||||
|
("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
|
||||||
|
or ("item" in ret.kwargs and ret.kwargs["item"] is None)
|
||||||
|
):
|
||||||
|
ret = EditablePartial.from_call(railroad.Terminal, name)
|
||||||
|
|
||||||
|
# Mark this element as "complete", ie it has all of its children
|
||||||
|
if el_id in lookup:
|
||||||
|
lookup[el_id].complete = True
|
||||||
|
|
||||||
|
if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
|
||||||
|
lookup.extract_into_diagram(el_id)
|
||||||
|
if ret is not None:
|
||||||
|
ret = EditablePartial.from_call(
|
||||||
|
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
|
||||||
|
)
|
||||||
|
|
||||||
|
return ret
|
|
@ -0,0 +1,26 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
{% if not head %}
|
||||||
|
<style type="text/css">
|
||||||
|
.railroad-heading {
|
||||||
|
font-family: monospace;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
{% else %}
|
||||||
|
{{ hear | safe }}
|
||||||
|
{% endif %}
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
{{ body | safe }}
|
||||||
|
{% for diagram in diagrams %}
|
||||||
|
<div class="railroad-group">
|
||||||
|
<h1 class="railroad-heading">{{ diagram.title }}</h1>
|
||||||
|
<div class="railroad-description">{{ diagram.text }}</div>
|
||||||
|
<div class="railroad-svg">
|
||||||
|
{{ diagram.svg }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,267 @@
|
||||||
|
# exceptions.py
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .util import col, line, lineno, _collapse_string_to_ranges
|
||||||
|
from .unicode import pyparsing_unicode as ppu
|
||||||
|
|
||||||
|
|
||||||
|
class ExceptionWordUnicode(ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
_extract_alphanums = _collapse_string_to_ranges(ExceptionWordUnicode.alphanums)
|
||||||
|
_exception_word_extractor = re.compile("([" + _extract_alphanums + "]{1,16})|.")
|
||||||
|
|
||||||
|
|
||||||
|
class ParseBaseException(Exception):
|
||||||
|
"""base exception class for all parsing runtime exceptions"""
|
||||||
|
|
||||||
|
# Performance tuning: we construct a *lot* of these, so keep this
|
||||||
|
# constructor as small and fast as possible
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
pstr: str,
|
||||||
|
loc: int = 0,
|
||||||
|
msg: Optional[str] = None,
|
||||||
|
elem=None,
|
||||||
|
):
|
||||||
|
self.loc = loc
|
||||||
|
if msg is None:
|
||||||
|
self.msg = pstr
|
||||||
|
self.pstr = ""
|
||||||
|
else:
|
||||||
|
self.msg = msg
|
||||||
|
self.pstr = pstr
|
||||||
|
self.parser_element = self.parserElement = elem
|
||||||
|
self.args = (pstr, loc, msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def explain_exception(exc, depth=16):
|
||||||
|
"""
|
||||||
|
Method to take an exception and translate the Python internal traceback into a list
|
||||||
|
of the pyparsing expressions that caused the exception to be raised.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
- exc - exception raised during parsing (need not be a ParseException, in support
|
||||||
|
of Python exceptions that might be raised in a parse action)
|
||||||
|
- depth (default=16) - number of levels back in the stack trace to list expression
|
||||||
|
and function names; if None, the full stack trace names will be listed; if 0, only
|
||||||
|
the failing input line, marker, and exception string will be shown
|
||||||
|
|
||||||
|
Returns a multi-line string listing the ParserElements and/or function names in the
|
||||||
|
exception's stack trace.
|
||||||
|
"""
|
||||||
|
import inspect
|
||||||
|
from .core import ParserElement
|
||||||
|
|
||||||
|
if depth is None:
|
||||||
|
depth = sys.getrecursionlimit()
|
||||||
|
ret = []
|
||||||
|
if isinstance(exc, ParseBaseException):
|
||||||
|
ret.append(exc.line)
|
||||||
|
ret.append(" " * (exc.column - 1) + "^")
|
||||||
|
ret.append("{}: {}".format(type(exc).__name__, exc))
|
||||||
|
|
||||||
|
if depth > 0:
|
||||||
|
callers = inspect.getinnerframes(exc.__traceback__, context=depth)
|
||||||
|
seen = set()
|
||||||
|
for i, ff in enumerate(callers[-depth:]):
|
||||||
|
frm = ff[0]
|
||||||
|
|
||||||
|
f_self = frm.f_locals.get("self", None)
|
||||||
|
if isinstance(f_self, ParserElement):
|
||||||
|
if frm.f_code.co_name not in ("parseImpl", "_parseNoCache"):
|
||||||
|
continue
|
||||||
|
if id(f_self) in seen:
|
||||||
|
continue
|
||||||
|
seen.add(id(f_self))
|
||||||
|
|
||||||
|
self_type = type(f_self)
|
||||||
|
ret.append(
|
||||||
|
"{}.{} - {}".format(
|
||||||
|
self_type.__module__, self_type.__name__, f_self
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
elif f_self is not None:
|
||||||
|
self_type = type(f_self)
|
||||||
|
ret.append("{}.{}".format(self_type.__module__, self_type.__name__))
|
||||||
|
|
||||||
|
else:
|
||||||
|
code = frm.f_code
|
||||||
|
if code.co_name in ("wrapper", "<module>"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
ret.append("{}".format(code.co_name))
|
||||||
|
|
||||||
|
depth -= 1
|
||||||
|
if not depth:
|
||||||
|
break
|
||||||
|
|
||||||
|
return "\n".join(ret)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _from_exception(cls, pe):
|
||||||
|
"""
|
||||||
|
internal factory method to simplify creating one type of ParseException
|
||||||
|
from another - avoids having __init__ signature conflicts among subclasses
|
||||||
|
"""
|
||||||
|
return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def line(self) -> str:
|
||||||
|
"""
|
||||||
|
Return the line of text where the exception occurred.
|
||||||
|
"""
|
||||||
|
return line(self.loc, self.pstr)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def lineno(self) -> int:
|
||||||
|
"""
|
||||||
|
Return the 1-based line number of text where the exception occurred.
|
||||||
|
"""
|
||||||
|
return lineno(self.loc, self.pstr)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def col(self) -> int:
|
||||||
|
"""
|
||||||
|
Return the 1-based column on the line of text where the exception occurred.
|
||||||
|
"""
|
||||||
|
return col(self.loc, self.pstr)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def column(self) -> int:
|
||||||
|
"""
|
||||||
|
Return the 1-based column on the line of text where the exception occurred.
|
||||||
|
"""
|
||||||
|
return col(self.loc, self.pstr)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
if self.pstr:
|
||||||
|
if self.loc >= len(self.pstr):
|
||||||
|
foundstr = ", found end of text"
|
||||||
|
else:
|
||||||
|
# pull out next word at error location
|
||||||
|
found_match = _exception_word_extractor.match(self.pstr, self.loc)
|
||||||
|
if found_match is not None:
|
||||||
|
found = found_match.group(0)
|
||||||
|
else:
|
||||||
|
found = self.pstr[self.loc : self.loc + 1]
|
||||||
|
foundstr = (", found %r" % found).replace(r"\\", "\\")
|
||||||
|
else:
|
||||||
|
foundstr = ""
|
||||||
|
return "{}{} (at char {}), (line:{}, col:{})".format(
|
||||||
|
self.msg, foundstr, self.loc, self.lineno, self.column
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return str(self)
|
||||||
|
|
||||||
|
def mark_input_line(self, marker_string: str = None, *, markerString=">!<") -> str:
|
||||||
|
"""
|
||||||
|
Extracts the exception line from the input string, and marks
|
||||||
|
the location of the exception with a special symbol.
|
||||||
|
"""
|
||||||
|
markerString = marker_string if marker_string is not None else markerString
|
||||||
|
line_str = self.line
|
||||||
|
line_column = self.column - 1
|
||||||
|
if markerString:
|
||||||
|
line_str = "".join(
|
||||||
|
(line_str[:line_column], markerString, line_str[line_column:])
|
||||||
|
)
|
||||||
|
return line_str.strip()
|
||||||
|
|
||||||
|
def explain(self, depth=16) -> str:
|
||||||
|
"""
|
||||||
|
Method to translate the Python internal traceback into a list
|
||||||
|
of the pyparsing expressions that caused the exception to be raised.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
- depth (default=16) - number of levels back in the stack trace to list expression
|
||||||
|
and function names; if None, the full stack trace names will be listed; if 0, only
|
||||||
|
the failing input line, marker, and exception string will be shown
|
||||||
|
|
||||||
|
Returns a multi-line string listing the ParserElements and/or function names in the
|
||||||
|
exception's stack trace.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
expr = pp.Word(pp.nums) * 3
|
||||||
|
try:
|
||||||
|
expr.parse_string("123 456 A789")
|
||||||
|
except pp.ParseException as pe:
|
||||||
|
print(pe.explain(depth=0))
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
123 456 A789
|
||||||
|
^
|
||||||
|
ParseException: Expected W:(0-9), found 'A' (at char 8), (line:1, col:9)
|
||||||
|
|
||||||
|
Note: the diagnostic output will include string representations of the expressions
|
||||||
|
that failed to parse. These representations will be more helpful if you use `set_name` to
|
||||||
|
give identifiable names to your expressions. Otherwise they will use the default string
|
||||||
|
forms, which may be cryptic to read.
|
||||||
|
|
||||||
|
Note: pyparsing's default truncation of exception tracebacks may also truncate the
|
||||||
|
stack of expressions that are displayed in the ``explain`` output. To get the full listing
|
||||||
|
of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True``
|
||||||
|
"""
|
||||||
|
return self.explain_exception(self, depth)
|
||||||
|
|
||||||
|
markInputline = mark_input_line
|
||||||
|
|
||||||
|
|
||||||
|
class ParseException(ParseBaseException):
|
||||||
|
"""
|
||||||
|
Exception thrown when a parse expression doesn't match the input string
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
try:
|
||||||
|
Word(nums).set_name("integer").parse_string("ABC")
|
||||||
|
except ParseException as pe:
|
||||||
|
print(pe)
|
||||||
|
print("column: {}".format(pe.column))
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
Expected integer (at char 0), (line:1, col:1)
|
||||||
|
column: 1
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ParseFatalException(ParseBaseException):
|
||||||
|
"""
|
||||||
|
User-throwable exception thrown when inconsistent parse content
|
||||||
|
is found; stops all parsing immediately
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ParseSyntaxException(ParseFatalException):
|
||||||
|
"""
|
||||||
|
Just like :class:`ParseFatalException`, but thrown internally
|
||||||
|
when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
|
||||||
|
that parsing is to stop immediately because an unbacktrackable
|
||||||
|
syntax error has been found.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class RecursiveGrammarException(Exception):
|
||||||
|
"""
|
||||||
|
Exception thrown by :class:`ParserElement.validate` if the
|
||||||
|
grammar could be left-recursive; parser may need to enable
|
||||||
|
left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>`
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, parseElementList):
|
||||||
|
self.parseElementTrace = parseElementList
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return "RecursiveGrammarException: {}".format(self.parseElementTrace)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,760 @@
|
||||||
|
# results.py
|
||||||
|
from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator
|
||||||
|
import pprint
|
||||||
|
from weakref import ref as wkref
|
||||||
|
from typing import Tuple, Any
|
||||||
|
|
||||||
|
str_type: Tuple[type, ...] = (str, bytes)
|
||||||
|
_generator_type = type((_ for _ in ()))
|
||||||
|
|
||||||
|
|
||||||
|
class _ParseResultsWithOffset:
|
||||||
|
__slots__ = ["tup"]
|
||||||
|
|
||||||
|
def __init__(self, p1, p2):
|
||||||
|
self.tup = (p1, p2)
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
return self.tup[i]
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
return self.tup
|
||||||
|
|
||||||
|
def __setstate__(self, *args):
|
||||||
|
self.tup = args[0]
|
||||||
|
|
||||||
|
|
||||||
|
class ParseResults:
|
||||||
|
"""Structured parse results, to provide multiple means of access to
|
||||||
|
the parsed data:
|
||||||
|
|
||||||
|
- as a list (``len(results)``)
|
||||||
|
- by list index (``results[0], results[1]``, etc.)
|
||||||
|
- by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
integer = Word(nums)
|
||||||
|
date_str = (integer.set_results_name("year") + '/'
|
||||||
|
+ integer.set_results_name("month") + '/'
|
||||||
|
+ integer.set_results_name("day"))
|
||||||
|
# equivalent form:
|
||||||
|
# date_str = (integer("year") + '/'
|
||||||
|
# + integer("month") + '/'
|
||||||
|
# + integer("day"))
|
||||||
|
|
||||||
|
# parse_string returns a ParseResults object
|
||||||
|
result = date_str.parse_string("1999/12/31")
|
||||||
|
|
||||||
|
def test(s, fn=repr):
|
||||||
|
print("{} -> {}".format(s, fn(eval(s))))
|
||||||
|
test("list(result)")
|
||||||
|
test("result[0]")
|
||||||
|
test("result['month']")
|
||||||
|
test("result.day")
|
||||||
|
test("'month' in result")
|
||||||
|
test("'minutes' in result")
|
||||||
|
test("result.dump()", str)
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
list(result) -> ['1999', '/', '12', '/', '31']
|
||||||
|
result[0] -> '1999'
|
||||||
|
result['month'] -> '12'
|
||||||
|
result.day -> '31'
|
||||||
|
'month' in result -> True
|
||||||
|
'minutes' in result -> False
|
||||||
|
result.dump() -> ['1999', '/', '12', '/', '31']
|
||||||
|
- day: 31
|
||||||
|
- month: 12
|
||||||
|
- year: 1999
|
||||||
|
"""
|
||||||
|
|
||||||
|
_null_values: Tuple[Any, ...] = (None, [], "", ())
|
||||||
|
|
||||||
|
__slots__ = [
|
||||||
|
"_name",
|
||||||
|
"_parent",
|
||||||
|
"_all_names",
|
||||||
|
"_modal",
|
||||||
|
"_toklist",
|
||||||
|
"_tokdict",
|
||||||
|
"__weakref__",
|
||||||
|
]
|
||||||
|
|
||||||
|
class List(list):
|
||||||
|
"""
|
||||||
|
Simple wrapper class to distinguish parsed list results that should be preserved
|
||||||
|
as actual Python lists, instead of being converted to :class:`ParseResults`:
|
||||||
|
|
||||||
|
LBRACK, RBRACK = map(pp.Suppress, "[]")
|
||||||
|
element = pp.Forward()
|
||||||
|
item = ppc.integer
|
||||||
|
element_list = LBRACK + pp.delimited_list(element) + RBRACK
|
||||||
|
|
||||||
|
# add parse actions to convert from ParseResults to actual Python collection types
|
||||||
|
def as_python_list(t):
|
||||||
|
return pp.ParseResults.List(t.as_list())
|
||||||
|
element_list.add_parse_action(as_python_list)
|
||||||
|
|
||||||
|
element <<= item | element_list
|
||||||
|
|
||||||
|
element.run_tests('''
|
||||||
|
100
|
||||||
|
[2,3,4]
|
||||||
|
[[2, 1],3,4]
|
||||||
|
[(2, 1),3,4]
|
||||||
|
(2,3,4)
|
||||||
|
''', post_parse=lambda s, r: (r[0], type(r[0])))
|
||||||
|
|
||||||
|
prints:
|
||||||
|
|
||||||
|
100
|
||||||
|
(100, <class 'int'>)
|
||||||
|
|
||||||
|
[2,3,4]
|
||||||
|
([2, 3, 4], <class 'list'>)
|
||||||
|
|
||||||
|
[[2, 1],3,4]
|
||||||
|
([[2, 1], 3, 4], <class 'list'>)
|
||||||
|
|
||||||
|
(Used internally by :class:`Group` when `aslist=True`.)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __new__(cls, contained=None):
|
||||||
|
if contained is None:
|
||||||
|
contained = []
|
||||||
|
|
||||||
|
if not isinstance(contained, list):
|
||||||
|
raise TypeError(
|
||||||
|
"{} may only be constructed with a list,"
|
||||||
|
" not {}".format(cls.__name__, type(contained).__name__)
|
||||||
|
)
|
||||||
|
|
||||||
|
return list.__new__(cls)
|
||||||
|
|
||||||
|
def __new__(cls, toklist=None, name=None, **kwargs):
|
||||||
|
if isinstance(toklist, ParseResults):
|
||||||
|
return toklist
|
||||||
|
self = object.__new__(cls)
|
||||||
|
self._name = None
|
||||||
|
self._parent = None
|
||||||
|
self._all_names = set()
|
||||||
|
|
||||||
|
if toklist is None:
|
||||||
|
self._toklist = []
|
||||||
|
elif isinstance(toklist, (list, _generator_type)):
|
||||||
|
self._toklist = (
|
||||||
|
[toklist[:]]
|
||||||
|
if isinstance(toklist, ParseResults.List)
|
||||||
|
else list(toklist)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._toklist = [toklist]
|
||||||
|
self._tokdict = dict()
|
||||||
|
return self
|
||||||
|
|
||||||
|
# Performance tuning: we construct a *lot* of these, so keep this
|
||||||
|
# constructor as small and fast as possible
|
||||||
|
def __init__(
|
||||||
|
self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
|
||||||
|
):
|
||||||
|
self._modal = modal
|
||||||
|
if name is not None and name != "":
|
||||||
|
if isinstance(name, int):
|
||||||
|
name = str(name)
|
||||||
|
if not modal:
|
||||||
|
self._all_names = {name}
|
||||||
|
self._name = name
|
||||||
|
if toklist not in self._null_values:
|
||||||
|
if isinstance(toklist, (str_type, type)):
|
||||||
|
toklist = [toklist]
|
||||||
|
if asList:
|
||||||
|
if isinstance(toklist, ParseResults):
|
||||||
|
self[name] = _ParseResultsWithOffset(
|
||||||
|
ParseResults(toklist._toklist), 0
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self[name] = _ParseResultsWithOffset(
|
||||||
|
ParseResults(toklist[0]), 0
|
||||||
|
)
|
||||||
|
self[name]._name = name
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self[name] = toklist[0]
|
||||||
|
except (KeyError, TypeError, IndexError):
|
||||||
|
if toklist is not self:
|
||||||
|
self[name] = toklist
|
||||||
|
else:
|
||||||
|
self._name = name
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
if isinstance(i, (int, slice)):
|
||||||
|
return self._toklist[i]
|
||||||
|
else:
|
||||||
|
if i not in self._all_names:
|
||||||
|
return self._tokdict[i][-1][0]
|
||||||
|
else:
|
||||||
|
return ParseResults([v[0] for v in self._tokdict[i]])
|
||||||
|
|
||||||
|
def __setitem__(self, k, v, isinstance=isinstance):
|
||||||
|
if isinstance(v, _ParseResultsWithOffset):
|
||||||
|
self._tokdict[k] = self._tokdict.get(k, list()) + [v]
|
||||||
|
sub = v[0]
|
||||||
|
elif isinstance(k, (int, slice)):
|
||||||
|
self._toklist[k] = v
|
||||||
|
sub = v
|
||||||
|
else:
|
||||||
|
self._tokdict[k] = self._tokdict.get(k, list()) + [
|
||||||
|
_ParseResultsWithOffset(v, 0)
|
||||||
|
]
|
||||||
|
sub = v
|
||||||
|
if isinstance(sub, ParseResults):
|
||||||
|
sub._parent = wkref(self)
|
||||||
|
|
||||||
|
def __delitem__(self, i):
|
||||||
|
if isinstance(i, (int, slice)):
|
||||||
|
mylen = len(self._toklist)
|
||||||
|
del self._toklist[i]
|
||||||
|
|
||||||
|
# convert int to slice
|
||||||
|
if isinstance(i, int):
|
||||||
|
if i < 0:
|
||||||
|
i += mylen
|
||||||
|
i = slice(i, i + 1)
|
||||||
|
# get removed indices
|
||||||
|
removed = list(range(*i.indices(mylen)))
|
||||||
|
removed.reverse()
|
||||||
|
# fixup indices in token dictionary
|
||||||
|
for name, occurrences in self._tokdict.items():
|
||||||
|
for j in removed:
|
||||||
|
for k, (value, position) in enumerate(occurrences):
|
||||||
|
occurrences[k] = _ParseResultsWithOffset(
|
||||||
|
value, position - (position > j)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
del self._tokdict[i]
|
||||||
|
|
||||||
|
def __contains__(self, k) -> bool:
|
||||||
|
return k in self._tokdict
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self._toklist)
|
||||||
|
|
||||||
|
def __bool__(self) -> bool:
|
||||||
|
return not not (self._toklist or self._tokdict)
|
||||||
|
|
||||||
|
def __iter__(self) -> Iterator:
|
||||||
|
return iter(self._toklist)
|
||||||
|
|
||||||
|
def __reversed__(self) -> Iterator:
|
||||||
|
return iter(self._toklist[::-1])
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return iter(self._tokdict)
|
||||||
|
|
||||||
|
def values(self):
|
||||||
|
return (self[k] for k in self.keys())
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return ((k, self[k]) for k in self.keys())
|
||||||
|
|
||||||
|
def haskeys(self) -> bool:
|
||||||
|
"""
|
||||||
|
Since ``keys()`` returns an iterator, this method is helpful in bypassing
|
||||||
|
code that looks for the existence of any defined results names."""
|
||||||
|
return bool(self._tokdict)
|
||||||
|
|
||||||
|
def pop(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Removes and returns item at specified index (default= ``last``).
|
||||||
|
Supports both ``list`` and ``dict`` semantics for ``pop()``. If
|
||||||
|
passed no argument or an integer argument, it will use ``list``
|
||||||
|
semantics and pop tokens from the list of parsed tokens. If passed
|
||||||
|
a non-integer argument (most likely a string), it will use ``dict``
|
||||||
|
semantics and pop the corresponding value from any defined results
|
||||||
|
names. A second default return value argument is supported, just as in
|
||||||
|
``dict.pop()``.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
numlist = Word(nums)[...]
|
||||||
|
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
||||||
|
|
||||||
|
def remove_first(tokens):
|
||||||
|
tokens.pop(0)
|
||||||
|
numlist.add_parse_action(remove_first)
|
||||||
|
print(numlist.parse_string("0 123 321")) # -> ['123', '321']
|
||||||
|
|
||||||
|
label = Word(alphas)
|
||||||
|
patt = label("LABEL") + OneOrMore(Word(nums))
|
||||||
|
print(patt.parse_string("AAB 123 321").dump())
|
||||||
|
|
||||||
|
# Use pop() in a parse action to remove named result (note that corresponding value is not
|
||||||
|
# removed from list form of results)
|
||||||
|
def remove_LABEL(tokens):
|
||||||
|
tokens.pop("LABEL")
|
||||||
|
return tokens
|
||||||
|
patt.add_parse_action(remove_LABEL)
|
||||||
|
print(patt.parse_string("AAB 123 321").dump())
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
['AAB', '123', '321']
|
||||||
|
- LABEL: AAB
|
||||||
|
|
||||||
|
['AAB', '123', '321']
|
||||||
|
"""
|
||||||
|
if not args:
|
||||||
|
args = [-1]
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
if k == "default":
|
||||||
|
args = (args[0], v)
|
||||||
|
else:
|
||||||
|
raise TypeError(
|
||||||
|
"pop() got an unexpected keyword argument {!r}".format(k)
|
||||||
|
)
|
||||||
|
if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
|
||||||
|
index = args[0]
|
||||||
|
ret = self[index]
|
||||||
|
del self[index]
|
||||||
|
return ret
|
||||||
|
else:
|
||||||
|
defaultvalue = args[1]
|
||||||
|
return defaultvalue
|
||||||
|
|
||||||
|
def get(self, key, default_value=None):
|
||||||
|
"""
|
||||||
|
Returns named result matching the given key, or if there is no
|
||||||
|
such name, then returns the given ``default_value`` or ``None`` if no
|
||||||
|
``default_value`` is specified.
|
||||||
|
|
||||||
|
Similar to ``dict.get()``.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
integer = Word(nums)
|
||||||
|
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||||
|
|
||||||
|
result = date_str.parse_string("1999/12/31")
|
||||||
|
print(result.get("year")) # -> '1999'
|
||||||
|
print(result.get("hour", "not specified")) # -> 'not specified'
|
||||||
|
print(result.get("hour")) # -> None
|
||||||
|
"""
|
||||||
|
if key in self:
|
||||||
|
return self[key]
|
||||||
|
else:
|
||||||
|
return default_value
|
||||||
|
|
||||||
|
def insert(self, index, ins_string):
|
||||||
|
"""
|
||||||
|
Inserts new element at location index in the list of parsed tokens.
|
||||||
|
|
||||||
|
Similar to ``list.insert()``.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
numlist = Word(nums)[...]
|
||||||
|
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
||||||
|
|
||||||
|
# use a parse action to insert the parse location in the front of the parsed results
|
||||||
|
def insert_locn(locn, tokens):
|
||||||
|
tokens.insert(0, locn)
|
||||||
|
numlist.add_parse_action(insert_locn)
|
||||||
|
print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
|
||||||
|
"""
|
||||||
|
self._toklist.insert(index, ins_string)
|
||||||
|
# fixup indices in token dictionary
|
||||||
|
for name, occurrences in self._tokdict.items():
|
||||||
|
for k, (value, position) in enumerate(occurrences):
|
||||||
|
occurrences[k] = _ParseResultsWithOffset(
|
||||||
|
value, position + (position > index)
|
||||||
|
)
|
||||||
|
|
||||||
|
def append(self, item):
|
||||||
|
"""
|
||||||
|
Add single element to end of ``ParseResults`` list of elements.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
numlist = Word(nums)[...]
|
||||||
|
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
||||||
|
|
||||||
|
# use a parse action to compute the sum of the parsed integers, and add it to the end
|
||||||
|
def append_sum(tokens):
|
||||||
|
tokens.append(sum(map(int, tokens)))
|
||||||
|
numlist.add_parse_action(append_sum)
|
||||||
|
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
|
||||||
|
"""
|
||||||
|
self._toklist.append(item)
|
||||||
|
|
||||||
|
def extend(self, itemseq):
|
||||||
|
"""
|
||||||
|
Add sequence of elements to end of ``ParseResults`` list of elements.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
patt = OneOrMore(Word(alphas))
|
||||||
|
|
||||||
|
# use a parse action to append the reverse of the matched strings, to make a palindrome
|
||||||
|
def make_palindrome(tokens):
|
||||||
|
tokens.extend(reversed([t[::-1] for t in tokens]))
|
||||||
|
return ''.join(tokens)
|
||||||
|
patt.add_parse_action(make_palindrome)
|
||||||
|
print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
|
||||||
|
"""
|
||||||
|
if isinstance(itemseq, ParseResults):
|
||||||
|
self.__iadd__(itemseq)
|
||||||
|
else:
|
||||||
|
self._toklist.extend(itemseq)
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
"""
|
||||||
|
Clear all elements and results names.
|
||||||
|
"""
|
||||||
|
del self._toklist[:]
|
||||||
|
self._tokdict.clear()
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
try:
|
||||||
|
return self[name]
|
||||||
|
except KeyError:
|
||||||
|
if name.startswith("__"):
|
||||||
|
raise AttributeError(name)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def __add__(self, other) -> "ParseResults":
|
||||||
|
ret = self.copy()
|
||||||
|
ret += other
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def __iadd__(self, other) -> "ParseResults":
|
||||||
|
if other._tokdict:
|
||||||
|
offset = len(self._toklist)
|
||||||
|
addoffset = lambda a: offset if a < 0 else a + offset
|
||||||
|
otheritems = other._tokdict.items()
|
||||||
|
otherdictitems = [
|
||||||
|
(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
|
||||||
|
for k, vlist in otheritems
|
||||||
|
for v in vlist
|
||||||
|
]
|
||||||
|
for k, v in otherdictitems:
|
||||||
|
self[k] = v
|
||||||
|
if isinstance(v[0], ParseResults):
|
||||||
|
v[0]._parent = wkref(self)
|
||||||
|
|
||||||
|
self._toklist += other._toklist
|
||||||
|
self._all_names |= other._all_names
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __radd__(self, other) -> "ParseResults":
|
||||||
|
if isinstance(other, int) and other == 0:
|
||||||
|
# useful for merging many ParseResults using sum() builtin
|
||||||
|
return self.copy()
|
||||||
|
else:
|
||||||
|
# this may raise a TypeError - so be it
|
||||||
|
return other + self
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict())
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return (
|
||||||
|
"["
|
||||||
|
+ ", ".join(
|
||||||
|
[
|
||||||
|
str(i) if isinstance(i, ParseResults) else repr(i)
|
||||||
|
for i in self._toklist
|
||||||
|
]
|
||||||
|
)
|
||||||
|
+ "]"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _asStringList(self, sep=""):
|
||||||
|
out = []
|
||||||
|
for item in self._toklist:
|
||||||
|
if out and sep:
|
||||||
|
out.append(sep)
|
||||||
|
if isinstance(item, ParseResults):
|
||||||
|
out += item._asStringList()
|
||||||
|
else:
|
||||||
|
out.append(str(item))
|
||||||
|
return out
|
||||||
|
|
||||||
|
def as_list(self) -> list:
|
||||||
|
"""
|
||||||
|
Returns the parse results as a nested list of matching tokens, all converted to strings.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
patt = OneOrMore(Word(alphas))
|
||||||
|
result = patt.parse_string("sldkj lsdkj sldkj")
|
||||||
|
# even though the result prints in string-like form, it is actually a pyparsing ParseResults
|
||||||
|
print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
|
||||||
|
|
||||||
|
# Use as_list() to create an actual list
|
||||||
|
result_list = result.as_list()
|
||||||
|
print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
res.as_list() if isinstance(res, ParseResults) else res
|
||||||
|
for res in self._toklist
|
||||||
|
]
|
||||||
|
|
||||||
|
def as_dict(self) -> dict:
|
||||||
|
"""
|
||||||
|
Returns the named parse results as a nested dictionary.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
integer = Word(nums)
|
||||||
|
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||||
|
|
||||||
|
result = date_str.parse_string('12/31/1999')
|
||||||
|
print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
|
||||||
|
|
||||||
|
result_dict = result.as_dict()
|
||||||
|
print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
|
||||||
|
|
||||||
|
# even though a ParseResults supports dict-like access, sometime you just need to have a dict
|
||||||
|
import json
|
||||||
|
print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
|
||||||
|
print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
|
||||||
|
"""
|
||||||
|
|
||||||
|
def to_item(obj):
|
||||||
|
if isinstance(obj, ParseResults):
|
||||||
|
return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
|
||||||
|
else:
|
||||||
|
return obj
|
||||||
|
|
||||||
|
return dict((k, to_item(v)) for k, v in self.items())
|
||||||
|
|
||||||
|
def copy(self) -> "ParseResults":
|
||||||
|
"""
|
||||||
|
Returns a new copy of a :class:`ParseResults` object.
|
||||||
|
"""
|
||||||
|
ret = ParseResults(self._toklist)
|
||||||
|
ret._tokdict = self._tokdict.copy()
|
||||||
|
ret._parent = self._parent
|
||||||
|
ret._all_names |= self._all_names
|
||||||
|
ret._name = self._name
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def get_name(self):
|
||||||
|
r"""
|
||||||
|
Returns the results name for this token expression. Useful when several
|
||||||
|
different expressions might match at a particular location.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
integer = Word(nums)
|
||||||
|
ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
|
||||||
|
house_number_expr = Suppress('#') + Word(nums, alphanums)
|
||||||
|
user_data = (Group(house_number_expr)("house_number")
|
||||||
|
| Group(ssn_expr)("ssn")
|
||||||
|
| Group(integer)("age"))
|
||||||
|
user_info = OneOrMore(user_data)
|
||||||
|
|
||||||
|
result = user_info.parse_string("22 111-22-3333 #221B")
|
||||||
|
for item in result:
|
||||||
|
print(item.get_name(), ':', item[0])
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
age : 22
|
||||||
|
ssn : 111-22-3333
|
||||||
|
house_number : 221B
|
||||||
|
"""
|
||||||
|
if self._name:
|
||||||
|
return self._name
|
||||||
|
elif self._parent:
|
||||||
|
par = self._parent()
|
||||||
|
|
||||||
|
def find_in_parent(sub):
|
||||||
|
return next(
|
||||||
|
(
|
||||||
|
k
|
||||||
|
for k, vlist in par._tokdict.items()
|
||||||
|
for v, loc in vlist
|
||||||
|
if sub is v
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
return find_in_parent(self) if par else None
|
||||||
|
elif (
|
||||||
|
len(self) == 1
|
||||||
|
and len(self._tokdict) == 1
|
||||||
|
and next(iter(self._tokdict.values()))[0][1] in (0, -1)
|
||||||
|
):
|
||||||
|
return next(iter(self._tokdict.keys()))
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
|
||||||
|
"""
|
||||||
|
Diagnostic method for listing out the contents of
|
||||||
|
a :class:`ParseResults`. Accepts an optional ``indent`` argument so
|
||||||
|
that this string can be embedded in a nested display of other data.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
integer = Word(nums)
|
||||||
|
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||||
|
|
||||||
|
result = date_str.parse_string('12/31/1999')
|
||||||
|
print(result.dump())
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
['12', '/', '31', '/', '1999']
|
||||||
|
- day: 1999
|
||||||
|
- month: 31
|
||||||
|
- year: 12
|
||||||
|
"""
|
||||||
|
out = []
|
||||||
|
NL = "\n"
|
||||||
|
out.append(indent + str(self.as_list()) if include_list else "")
|
||||||
|
|
||||||
|
if full:
|
||||||
|
if self.haskeys():
|
||||||
|
items = sorted((str(k), v) for k, v in self.items())
|
||||||
|
for k, v in items:
|
||||||
|
if out:
|
||||||
|
out.append(NL)
|
||||||
|
out.append("{}{}- {}: ".format(indent, (" " * _depth), k))
|
||||||
|
if isinstance(v, ParseResults):
|
||||||
|
if v:
|
||||||
|
out.append(
|
||||||
|
v.dump(
|
||||||
|
indent=indent,
|
||||||
|
full=full,
|
||||||
|
include_list=include_list,
|
||||||
|
_depth=_depth + 1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
out.append(str(v))
|
||||||
|
else:
|
||||||
|
out.append(repr(v))
|
||||||
|
if any(isinstance(vv, ParseResults) for vv in self):
|
||||||
|
v = self
|
||||||
|
for i, vv in enumerate(v):
|
||||||
|
if isinstance(vv, ParseResults):
|
||||||
|
out.append(
|
||||||
|
"\n{}{}[{}]:\n{}{}{}".format(
|
||||||
|
indent,
|
||||||
|
(" " * (_depth)),
|
||||||
|
i,
|
||||||
|
indent,
|
||||||
|
(" " * (_depth + 1)),
|
||||||
|
vv.dump(
|
||||||
|
indent=indent,
|
||||||
|
full=full,
|
||||||
|
include_list=include_list,
|
||||||
|
_depth=_depth + 1,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
out.append(
|
||||||
|
"\n%s%s[%d]:\n%s%s%s"
|
||||||
|
% (
|
||||||
|
indent,
|
||||||
|
(" " * (_depth)),
|
||||||
|
i,
|
||||||
|
indent,
|
||||||
|
(" " * (_depth + 1)),
|
||||||
|
str(vv),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return "".join(out)
|
||||||
|
|
||||||
|
def pprint(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Pretty-printer for parsed results as a list, using the
|
||||||
|
`pprint <https://docs.python.org/3/library/pprint.html>`_ module.
|
||||||
|
Accepts additional positional or keyword args as defined for
|
||||||
|
`pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
ident = Word(alphas, alphanums)
|
||||||
|
num = Word(nums)
|
||||||
|
func = Forward()
|
||||||
|
term = ident | num | Group('(' + func + ')')
|
||||||
|
func <<= ident + Group(Optional(delimited_list(term)))
|
||||||
|
result = func.parse_string("fna a,b,(fnb c,d,200),100")
|
||||||
|
result.pprint(width=40)
|
||||||
|
|
||||||
|
prints::
|
||||||
|
|
||||||
|
['fna',
|
||||||
|
['a',
|
||||||
|
'b',
|
||||||
|
['(', 'fnb', ['c', 'd', '200'], ')'],
|
||||||
|
'100']]
|
||||||
|
"""
|
||||||
|
pprint.pprint(self.as_list(), *args, **kwargs)
|
||||||
|
|
||||||
|
# add support for pickle protocol
|
||||||
|
def __getstate__(self):
|
||||||
|
return (
|
||||||
|
self._toklist,
|
||||||
|
(
|
||||||
|
self._tokdict.copy(),
|
||||||
|
self._parent is not None and self._parent() or None,
|
||||||
|
self._all_names,
|
||||||
|
self._name,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
|
||||||
|
self._all_names = set(inAccumNames)
|
||||||
|
if par is not None:
|
||||||
|
self._parent = wkref(par)
|
||||||
|
else:
|
||||||
|
self._parent = None
|
||||||
|
|
||||||
|
def __getnewargs__(self):
|
||||||
|
return self._toklist, self._name
|
||||||
|
|
||||||
|
def __dir__(self):
|
||||||
|
return dir(type(self)) + list(self.keys())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, other, name=None) -> "ParseResults":
|
||||||
|
"""
|
||||||
|
Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
|
||||||
|
name-value relations as results names. If an optional ``name`` argument is
|
||||||
|
given, a nested ``ParseResults`` will be returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def is_iterable(obj):
|
||||||
|
try:
|
||||||
|
iter(obj)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return not isinstance(obj, str_type)
|
||||||
|
|
||||||
|
ret = cls([])
|
||||||
|
for k, v in other.items():
|
||||||
|
if isinstance(v, Mapping):
|
||||||
|
ret += cls.from_dict(v, name=k)
|
||||||
|
else:
|
||||||
|
ret += cls([v], name=k, asList=is_iterable(v))
|
||||||
|
if name is not None:
|
||||||
|
ret = cls([ret], name=name)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
asList = as_list
|
||||||
|
asDict = as_dict
|
||||||
|
getName = get_name
|
||||||
|
|
||||||
|
|
||||||
|
MutableMapping.register(ParseResults)
|
||||||
|
MutableSequence.register(ParseResults)
|
|
@ -0,0 +1,331 @@
|
||||||
|
# testing.py
|
||||||
|
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .core import (
|
||||||
|
ParserElement,
|
||||||
|
ParseException,
|
||||||
|
Keyword,
|
||||||
|
__diag__,
|
||||||
|
__compat__,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class pyparsing_test:
|
||||||
|
"""
|
||||||
|
namespace class for classes useful in writing unit tests
|
||||||
|
"""
|
||||||
|
|
||||||
|
class reset_pyparsing_context:
|
||||||
|
"""
|
||||||
|
Context manager to be used when writing unit tests that modify pyparsing config values:
|
||||||
|
- packrat parsing
|
||||||
|
- bounded recursion parsing
|
||||||
|
- default whitespace characters.
|
||||||
|
- default keyword characters
|
||||||
|
- literal string auto-conversion class
|
||||||
|
- __diag__ settings
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
with reset_pyparsing_context():
|
||||||
|
# test that literals used to construct a grammar are automatically suppressed
|
||||||
|
ParserElement.inlineLiteralsUsing(Suppress)
|
||||||
|
|
||||||
|
term = Word(alphas) | Word(nums)
|
||||||
|
group = Group('(' + term[...] + ')')
|
||||||
|
|
||||||
|
# assert that the '()' characters are not included in the parsed tokens
|
||||||
|
self.assertParseAndCheckList(group, "(abc 123 def)", ['abc', '123', 'def'])
|
||||||
|
|
||||||
|
# after exiting context manager, literals are converted to Literal expressions again
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._save_context = {}
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
|
||||||
|
self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
|
||||||
|
|
||||||
|
self._save_context[
|
||||||
|
"literal_string_class"
|
||||||
|
] = ParserElement._literalStringClass
|
||||||
|
|
||||||
|
self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace
|
||||||
|
|
||||||
|
self._save_context["packrat_enabled"] = ParserElement._packratEnabled
|
||||||
|
if ParserElement._packratEnabled:
|
||||||
|
self._save_context[
|
||||||
|
"packrat_cache_size"
|
||||||
|
] = ParserElement.packrat_cache.size
|
||||||
|
else:
|
||||||
|
self._save_context["packrat_cache_size"] = None
|
||||||
|
self._save_context["packrat_parse"] = ParserElement._parse
|
||||||
|
self._save_context[
|
||||||
|
"recursion_enabled"
|
||||||
|
] = ParserElement._left_recursion_enabled
|
||||||
|
|
||||||
|
self._save_context["__diag__"] = {
|
||||||
|
name: getattr(__diag__, name) for name in __diag__._all_names
|
||||||
|
}
|
||||||
|
|
||||||
|
self._save_context["__compat__"] = {
|
||||||
|
"collect_all_And_tokens": __compat__.collect_all_And_tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def restore(self):
|
||||||
|
# reset pyparsing global state
|
||||||
|
if (
|
||||||
|
ParserElement.DEFAULT_WHITE_CHARS
|
||||||
|
!= self._save_context["default_whitespace"]
|
||||||
|
):
|
||||||
|
ParserElement.set_default_whitespace_chars(
|
||||||
|
self._save_context["default_whitespace"]
|
||||||
|
)
|
||||||
|
|
||||||
|
ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"]
|
||||||
|
|
||||||
|
Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
|
||||||
|
ParserElement.inlineLiteralsUsing(
|
||||||
|
self._save_context["literal_string_class"]
|
||||||
|
)
|
||||||
|
|
||||||
|
for name, value in self._save_context["__diag__"].items():
|
||||||
|
(__diag__.enable if value else __diag__.disable)(name)
|
||||||
|
|
||||||
|
ParserElement._packratEnabled = False
|
||||||
|
if self._save_context["packrat_enabled"]:
|
||||||
|
ParserElement.enable_packrat(self._save_context["packrat_cache_size"])
|
||||||
|
else:
|
||||||
|
ParserElement._parse = self._save_context["packrat_parse"]
|
||||||
|
ParserElement._left_recursion_enabled = self._save_context[
|
||||||
|
"recursion_enabled"
|
||||||
|
]
|
||||||
|
|
||||||
|
__compat__.collect_all_And_tokens = self._save_context["__compat__"]
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
ret = type(self)()
|
||||||
|
ret._save_context.update(self._save_context)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self.save()
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
self.restore()
|
||||||
|
|
||||||
|
class TestParseResultsAsserts:
|
||||||
|
"""
|
||||||
|
A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def assertParseResultsEquals(
|
||||||
|
self, result, expected_list=None, expected_dict=None, msg=None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``,
|
||||||
|
and compare any defined results names with an optional ``expected_dict``.
|
||||||
|
"""
|
||||||
|
if expected_list is not None:
|
||||||
|
self.assertEqual(expected_list, result.as_list(), msg=msg)
|
||||||
|
if expected_dict is not None:
|
||||||
|
self.assertEqual(expected_dict, result.as_dict(), msg=msg)
|
||||||
|
|
||||||
|
def assertParseAndCheckList(
|
||||||
|
self, expr, test_string, expected_list, msg=None, verbose=True
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Convenience wrapper assert to test a parser element and input string, and assert that
|
||||||
|
the resulting ``ParseResults.asList()`` is equal to the ``expected_list``.
|
||||||
|
"""
|
||||||
|
result = expr.parse_string(test_string, parse_all=True)
|
||||||
|
if verbose:
|
||||||
|
print(result.dump())
|
||||||
|
else:
|
||||||
|
print(result.as_list())
|
||||||
|
self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
|
||||||
|
|
||||||
|
def assertParseAndCheckDict(
|
||||||
|
self, expr, test_string, expected_dict, msg=None, verbose=True
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Convenience wrapper assert to test a parser element and input string, and assert that
|
||||||
|
the resulting ``ParseResults.asDict()`` is equal to the ``expected_dict``.
|
||||||
|
"""
|
||||||
|
result = expr.parse_string(test_string, parseAll=True)
|
||||||
|
if verbose:
|
||||||
|
print(result.dump())
|
||||||
|
else:
|
||||||
|
print(result.as_list())
|
||||||
|
self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
|
||||||
|
|
||||||
|
def assertRunTestResults(
|
||||||
|
self, run_tests_report, expected_parse_results=None, msg=None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Unit test assertion to evaluate output of ``ParserElement.runTests()``. If a list of
|
||||||
|
list-dict tuples is given as the ``expected_parse_results`` argument, then these are zipped
|
||||||
|
with the report tuples returned by ``runTests`` and evaluated using ``assertParseResultsEquals``.
|
||||||
|
Finally, asserts that the overall ``runTests()`` success value is ``True``.
|
||||||
|
|
||||||
|
:param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
|
||||||
|
:param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
|
||||||
|
"""
|
||||||
|
run_test_success, run_test_results = run_tests_report
|
||||||
|
|
||||||
|
if expected_parse_results is not None:
|
||||||
|
merged = [
|
||||||
|
(*rpt, expected)
|
||||||
|
for rpt, expected in zip(run_test_results, expected_parse_results)
|
||||||
|
]
|
||||||
|
for test_string, result, expected in merged:
|
||||||
|
# expected should be a tuple containing a list and/or a dict or an exception,
|
||||||
|
# and optional failure message string
|
||||||
|
# an empty tuple will skip any result validation
|
||||||
|
fail_msg = next(
|
||||||
|
(exp for exp in expected if isinstance(exp, str)), None
|
||||||
|
)
|
||||||
|
expected_exception = next(
|
||||||
|
(
|
||||||
|
exp
|
||||||
|
for exp in expected
|
||||||
|
if isinstance(exp, type) and issubclass(exp, Exception)
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if expected_exception is not None:
|
||||||
|
with self.assertRaises(
|
||||||
|
expected_exception=expected_exception, msg=fail_msg or msg
|
||||||
|
):
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
raise result
|
||||||
|
else:
|
||||||
|
expected_list = next(
|
||||||
|
(exp for exp in expected if isinstance(exp, list)), None
|
||||||
|
)
|
||||||
|
expected_dict = next(
|
||||||
|
(exp for exp in expected if isinstance(exp, dict)), None
|
||||||
|
)
|
||||||
|
if (expected_list, expected_dict) != (None, None):
|
||||||
|
self.assertParseResultsEquals(
|
||||||
|
result,
|
||||||
|
expected_list=expected_list,
|
||||||
|
expected_dict=expected_dict,
|
||||||
|
msg=fail_msg or msg,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# warning here maybe?
|
||||||
|
print("no validation for {!r}".format(test_string))
|
||||||
|
|
||||||
|
# do this last, in case some specific test results can be reported instead
|
||||||
|
self.assertTrue(
|
||||||
|
run_test_success, msg=msg if msg is not None else "failed runTests"
|
||||||
|
)
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def assertRaisesParseException(self, exc_type=ParseException, msg=None):
|
||||||
|
with self.assertRaises(exc_type, msg=msg):
|
||||||
|
yield
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def with_line_numbers(
|
||||||
|
s: str,
|
||||||
|
start_line: Optional[int] = None,
|
||||||
|
end_line: Optional[int] = None,
|
||||||
|
expand_tabs: bool = True,
|
||||||
|
eol_mark: str = "|",
|
||||||
|
mark_spaces: Optional[str] = None,
|
||||||
|
mark_control: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Helpful method for debugging a parser - prints a string with line and column numbers.
|
||||||
|
(Line and column numbers are 1-based.)
|
||||||
|
|
||||||
|
:param s: tuple(bool, str - string to be printed with line and column numbers
|
||||||
|
:param start_line: int - (optional) starting line number in s to print (default=1)
|
||||||
|
:param end_line: int - (optional) ending line number in s to print (default=len(s))
|
||||||
|
:param expand_tabs: bool - (optional) expand tabs to spaces, to match the pyparsing default
|
||||||
|
:param eol_mark: str - (optional) string to mark the end of lines, helps visualize trailing spaces (default="|")
|
||||||
|
:param mark_spaces: str - (optional) special character to display in place of spaces
|
||||||
|
:param mark_control: str - (optional) convert non-printing control characters to a placeholding
|
||||||
|
character; valid values:
|
||||||
|
- "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊"
|
||||||
|
- any single character string - replace control characters with given string
|
||||||
|
- None (default) - string is displayed as-is
|
||||||
|
|
||||||
|
:return: str - input string with leading line numbers and column number headers
|
||||||
|
"""
|
||||||
|
if expand_tabs:
|
||||||
|
s = s.expandtabs()
|
||||||
|
if mark_control is not None:
|
||||||
|
if mark_control == "unicode":
|
||||||
|
tbl = str.maketrans(
|
||||||
|
{c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433))}
|
||||||
|
| {127: 0x2421}
|
||||||
|
)
|
||||||
|
eol_mark = ""
|
||||||
|
else:
|
||||||
|
tbl = str.maketrans(
|
||||||
|
{c: mark_control for c in list(range(0, 32)) + [127]}
|
||||||
|
)
|
||||||
|
s = s.translate(tbl)
|
||||||
|
if mark_spaces is not None and mark_spaces != " ":
|
||||||
|
if mark_spaces == "unicode":
|
||||||
|
tbl = str.maketrans({9: 0x2409, 32: 0x2423})
|
||||||
|
s = s.translate(tbl)
|
||||||
|
else:
|
||||||
|
s = s.replace(" ", mark_spaces)
|
||||||
|
if start_line is None:
|
||||||
|
start_line = 1
|
||||||
|
if end_line is None:
|
||||||
|
end_line = len(s)
|
||||||
|
end_line = min(end_line, len(s))
|
||||||
|
start_line = min(max(1, start_line), end_line)
|
||||||
|
|
||||||
|
if mark_control != "unicode":
|
||||||
|
s_lines = s.splitlines()[start_line - 1 : end_line]
|
||||||
|
else:
|
||||||
|
s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]]
|
||||||
|
if not s_lines:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
lineno_width = len(str(end_line))
|
||||||
|
max_line_len = max(len(line) for line in s_lines)
|
||||||
|
lead = " " * (lineno_width + 1)
|
||||||
|
if max_line_len >= 99:
|
||||||
|
header0 = (
|
||||||
|
lead
|
||||||
|
+ "".join(
|
||||||
|
"{}{}".format(" " * 99, (i + 1) % 100)
|
||||||
|
for i in range(max(max_line_len // 100, 1))
|
||||||
|
)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
header0 = ""
|
||||||
|
header1 = (
|
||||||
|
header0
|
||||||
|
+ lead
|
||||||
|
+ "".join(
|
||||||
|
" {}".format((i + 1) % 10)
|
||||||
|
for i in range(-(-max_line_len // 10))
|
||||||
|
)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
|
header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n"
|
||||||
|
return (
|
||||||
|
header1
|
||||||
|
+ header2
|
||||||
|
+ "\n".join(
|
||||||
|
"{:{}d}:{}{}".format(i, lineno_width, line, eol_mark)
|
||||||
|
for i, line in enumerate(s_lines, start=start_line)
|
||||||
|
)
|
||||||
|
+ "\n"
|
||||||
|
)
|
|
@ -0,0 +1,332 @@
|
||||||
|
# unicode.py
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from itertools import filterfalse
|
||||||
|
from typing import List, Tuple, Union
|
||||||
|
|
||||||
|
|
||||||
|
class _lazyclassproperty:
|
||||||
|
def __init__(self, fn):
|
||||||
|
self.fn = fn
|
||||||
|
self.__doc__ = fn.__doc__
|
||||||
|
self.__name__ = fn.__name__
|
||||||
|
|
||||||
|
def __get__(self, obj, cls):
|
||||||
|
if cls is None:
|
||||||
|
cls = type(obj)
|
||||||
|
if not hasattr(cls, "_intern") or any(
|
||||||
|
cls._intern is getattr(superclass, "_intern", [])
|
||||||
|
for superclass in cls.__mro__[1:]
|
||||||
|
):
|
||||||
|
cls._intern = {}
|
||||||
|
attrname = self.fn.__name__
|
||||||
|
if attrname not in cls._intern:
|
||||||
|
cls._intern[attrname] = self.fn(cls)
|
||||||
|
return cls._intern[attrname]
|
||||||
|
|
||||||
|
|
||||||
|
UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
|
||||||
|
|
||||||
|
|
||||||
|
class unicode_set:
|
||||||
|
"""
|
||||||
|
A set of Unicode characters, for language-specific strings for
|
||||||
|
``alphas``, ``nums``, ``alphanums``, and ``printables``.
|
||||||
|
A unicode_set is defined by a list of ranges in the Unicode character
|
||||||
|
set, in a class attribute ``_ranges``. Ranges can be specified using
|
||||||
|
2-tuples or a 1-tuple, such as::
|
||||||
|
|
||||||
|
_ranges = [
|
||||||
|
(0x0020, 0x007e),
|
||||||
|
(0x00a0, 0x00ff),
|
||||||
|
(0x0100,),
|
||||||
|
]
|
||||||
|
|
||||||
|
Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
|
||||||
|
|
||||||
|
A unicode set can also be defined using multiple inheritance of other unicode sets::
|
||||||
|
|
||||||
|
class CJK(Chinese, Japanese, Korean):
|
||||||
|
pass
|
||||||
|
"""
|
||||||
|
|
||||||
|
_ranges: UnicodeRangeList = []
|
||||||
|
|
||||||
|
@_lazyclassproperty
|
||||||
|
def _chars_for_ranges(cls):
|
||||||
|
ret = []
|
||||||
|
for cc in cls.__mro__:
|
||||||
|
if cc is unicode_set:
|
||||||
|
break
|
||||||
|
for rr in getattr(cc, "_ranges", ()):
|
||||||
|
ret.extend(range(rr[0], rr[-1] + 1))
|
||||||
|
return [chr(c) for c in sorted(set(ret))]
|
||||||
|
|
||||||
|
@_lazyclassproperty
|
||||||
|
def printables(cls):
|
||||||
|
"all non-whitespace characters in this range"
|
||||||
|
return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
|
||||||
|
|
||||||
|
@_lazyclassproperty
|
||||||
|
def alphas(cls):
|
||||||
|
"all alphabetic characters in this range"
|
||||||
|
return "".join(filter(str.isalpha, cls._chars_for_ranges))
|
||||||
|
|
||||||
|
@_lazyclassproperty
|
||||||
|
def nums(cls):
|
||||||
|
"all numeric digit characters in this range"
|
||||||
|
return "".join(filter(str.isdigit, cls._chars_for_ranges))
|
||||||
|
|
||||||
|
@_lazyclassproperty
|
||||||
|
def alphanums(cls):
|
||||||
|
"all alphanumeric characters in this range"
|
||||||
|
return cls.alphas + cls.nums
|
||||||
|
|
||||||
|
@_lazyclassproperty
|
||||||
|
def identchars(cls):
|
||||||
|
"all characters in this range that are valid identifier characters, plus underscore '_'"
|
||||||
|
return "".join(
|
||||||
|
sorted(
|
||||||
|
set(
|
||||||
|
"".join(filter(str.isidentifier, cls._chars_for_ranges))
|
||||||
|
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
|
||||||
|
+ "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
|
||||||
|
+ "_"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@_lazyclassproperty
|
||||||
|
def identbodychars(cls):
|
||||||
|
"""
|
||||||
|
all characters in this range that are valid identifier body characters,
|
||||||
|
plus the digits 0-9
|
||||||
|
"""
|
||||||
|
return "".join(
|
||||||
|
sorted(
|
||||||
|
set(
|
||||||
|
cls.identchars
|
||||||
|
+ "0123456789"
|
||||||
|
+ "".join(
|
||||||
|
[c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class pyparsing_unicode(unicode_set):
|
||||||
|
"""
|
||||||
|
A namespace class for defining common language unicode_sets.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_ranges: UnicodeRangeList = [(32, sys.maxunicode)]
|
||||||
|
|
||||||
|
class Latin1(unicode_set):
|
||||||
|
"Unicode set for Latin-1 Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x0020, 0x007E),
|
||||||
|
(0x00A0, 0x00FF),
|
||||||
|
]
|
||||||
|
|
||||||
|
class LatinA(unicode_set):
|
||||||
|
"Unicode set for Latin-A Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x0100, 0x017F),
|
||||||
|
]
|
||||||
|
|
||||||
|
class LatinB(unicode_set):
|
||||||
|
"Unicode set for Latin-B Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x0180, 0x024F),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Greek(unicode_set):
|
||||||
|
"Unicode set for Greek Unicode Character Ranges"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x0342, 0x0345),
|
||||||
|
(0x0370, 0x0377),
|
||||||
|
(0x037A, 0x037F),
|
||||||
|
(0x0384, 0x038A),
|
||||||
|
(0x038C,),
|
||||||
|
(0x038E, 0x03A1),
|
||||||
|
(0x03A3, 0x03E1),
|
||||||
|
(0x03F0, 0x03FF),
|
||||||
|
(0x1D26, 0x1D2A),
|
||||||
|
(0x1D5E,),
|
||||||
|
(0x1D60,),
|
||||||
|
(0x1D66, 0x1D6A),
|
||||||
|
(0x1F00, 0x1F15),
|
||||||
|
(0x1F18, 0x1F1D),
|
||||||
|
(0x1F20, 0x1F45),
|
||||||
|
(0x1F48, 0x1F4D),
|
||||||
|
(0x1F50, 0x1F57),
|
||||||
|
(0x1F59,),
|
||||||
|
(0x1F5B,),
|
||||||
|
(0x1F5D,),
|
||||||
|
(0x1F5F, 0x1F7D),
|
||||||
|
(0x1F80, 0x1FB4),
|
||||||
|
(0x1FB6, 0x1FC4),
|
||||||
|
(0x1FC6, 0x1FD3),
|
||||||
|
(0x1FD6, 0x1FDB),
|
||||||
|
(0x1FDD, 0x1FEF),
|
||||||
|
(0x1FF2, 0x1FF4),
|
||||||
|
(0x1FF6, 0x1FFE),
|
||||||
|
(0x2129,),
|
||||||
|
(0x2719, 0x271A),
|
||||||
|
(0xAB65,),
|
||||||
|
(0x10140, 0x1018D),
|
||||||
|
(0x101A0,),
|
||||||
|
(0x1D200, 0x1D245),
|
||||||
|
(0x1F7A1, 0x1F7A7),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Cyrillic(unicode_set):
|
||||||
|
"Unicode set for Cyrillic Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x0400, 0x052F),
|
||||||
|
(0x1C80, 0x1C88),
|
||||||
|
(0x1D2B,),
|
||||||
|
(0x1D78,),
|
||||||
|
(0x2DE0, 0x2DFF),
|
||||||
|
(0xA640, 0xA672),
|
||||||
|
(0xA674, 0xA69F),
|
||||||
|
(0xFE2E, 0xFE2F),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Chinese(unicode_set):
|
||||||
|
"Unicode set for Chinese Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x2E80, 0x2E99),
|
||||||
|
(0x2E9B, 0x2EF3),
|
||||||
|
(0x31C0, 0x31E3),
|
||||||
|
(0x3400, 0x4DB5),
|
||||||
|
(0x4E00, 0x9FEF),
|
||||||
|
(0xA700, 0xA707),
|
||||||
|
(0xF900, 0xFA6D),
|
||||||
|
(0xFA70, 0xFAD9),
|
||||||
|
(0x16FE2, 0x16FE3),
|
||||||
|
(0x1F210, 0x1F212),
|
||||||
|
(0x1F214, 0x1F23B),
|
||||||
|
(0x1F240, 0x1F248),
|
||||||
|
(0x20000, 0x2A6D6),
|
||||||
|
(0x2A700, 0x2B734),
|
||||||
|
(0x2B740, 0x2B81D),
|
||||||
|
(0x2B820, 0x2CEA1),
|
||||||
|
(0x2CEB0, 0x2EBE0),
|
||||||
|
(0x2F800, 0x2FA1D),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Japanese(unicode_set):
|
||||||
|
"Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
|
||||||
|
_ranges: UnicodeRangeList = []
|
||||||
|
|
||||||
|
class Kanji(unicode_set):
|
||||||
|
"Unicode set for Kanji Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x4E00, 0x9FBF),
|
||||||
|
(0x3000, 0x303F),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Hiragana(unicode_set):
|
||||||
|
"Unicode set for Hiragana Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x3041, 0x3096),
|
||||||
|
(0x3099, 0x30A0),
|
||||||
|
(0x30FC,),
|
||||||
|
(0xFF70,),
|
||||||
|
(0x1B001,),
|
||||||
|
(0x1B150, 0x1B152),
|
||||||
|
(0x1F200,),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Katakana(unicode_set):
|
||||||
|
"Unicode set for Katakana Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x3099, 0x309C),
|
||||||
|
(0x30A0, 0x30FF),
|
||||||
|
(0x31F0, 0x31FF),
|
||||||
|
(0x32D0, 0x32FE),
|
||||||
|
(0xFF65, 0xFF9F),
|
||||||
|
(0x1B000,),
|
||||||
|
(0x1B164, 0x1B167),
|
||||||
|
(0x1F201, 0x1F202),
|
||||||
|
(0x1F213,),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Hangul(unicode_set):
|
||||||
|
"Unicode set for Hangul (Korean) Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x1100, 0x11FF),
|
||||||
|
(0x302E, 0x302F),
|
||||||
|
(0x3131, 0x318E),
|
||||||
|
(0x3200, 0x321C),
|
||||||
|
(0x3260, 0x327B),
|
||||||
|
(0x327E,),
|
||||||
|
(0xA960, 0xA97C),
|
||||||
|
(0xAC00, 0xD7A3),
|
||||||
|
(0xD7B0, 0xD7C6),
|
||||||
|
(0xD7CB, 0xD7FB),
|
||||||
|
(0xFFA0, 0xFFBE),
|
||||||
|
(0xFFC2, 0xFFC7),
|
||||||
|
(0xFFCA, 0xFFCF),
|
||||||
|
(0xFFD2, 0xFFD7),
|
||||||
|
(0xFFDA, 0xFFDC),
|
||||||
|
]
|
||||||
|
|
||||||
|
Korean = Hangul
|
||||||
|
|
||||||
|
class CJK(Chinese, Japanese, Hangul):
|
||||||
|
"Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Thai(unicode_set):
|
||||||
|
"Unicode set for Thai Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)]
|
||||||
|
|
||||||
|
class Arabic(unicode_set):
|
||||||
|
"Unicode set for Arabic Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x0600, 0x061B),
|
||||||
|
(0x061E, 0x06FF),
|
||||||
|
(0x0700, 0x077F),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Hebrew(unicode_set):
|
||||||
|
"Unicode set for Hebrew Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [
|
||||||
|
(0x0591, 0x05C7),
|
||||||
|
(0x05D0, 0x05EA),
|
||||||
|
(0x05EF, 0x05F4),
|
||||||
|
(0xFB1D, 0xFB36),
|
||||||
|
(0xFB38, 0xFB3C),
|
||||||
|
(0xFB3E,),
|
||||||
|
(0xFB40, 0xFB41),
|
||||||
|
(0xFB43, 0xFB44),
|
||||||
|
(0xFB46, 0xFB4F),
|
||||||
|
]
|
||||||
|
|
||||||
|
class Devanagari(unicode_set):
|
||||||
|
"Unicode set for Devanagari Unicode Character Range"
|
||||||
|
_ranges: UnicodeRangeList = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)]
|
||||||
|
|
||||||
|
|
||||||
|
pyparsing_unicode.Japanese._ranges = (
|
||||||
|
pyparsing_unicode.Japanese.Kanji._ranges
|
||||||
|
+ pyparsing_unicode.Japanese.Hiragana._ranges
|
||||||
|
+ pyparsing_unicode.Japanese.Katakana._ranges
|
||||||
|
)
|
||||||
|
|
||||||
|
# define ranges in language character sets
|
||||||
|
pyparsing_unicode.العربية = pyparsing_unicode.Arabic
|
||||||
|
pyparsing_unicode.中文 = pyparsing_unicode.Chinese
|
||||||
|
pyparsing_unicode.кириллица = pyparsing_unicode.Cyrillic
|
||||||
|
pyparsing_unicode.Ελληνικά = pyparsing_unicode.Greek
|
||||||
|
pyparsing_unicode.עִברִית = pyparsing_unicode.Hebrew
|
||||||
|
pyparsing_unicode.日本語 = pyparsing_unicode.Japanese
|
||||||
|
pyparsing_unicode.Japanese.漢字 = pyparsing_unicode.Japanese.Kanji
|
||||||
|
pyparsing_unicode.Japanese.カタカナ = pyparsing_unicode.Japanese.Katakana
|
||||||
|
pyparsing_unicode.Japanese.ひらがな = pyparsing_unicode.Japanese.Hiragana
|
||||||
|
pyparsing_unicode.한국어 = pyparsing_unicode.Korean
|
||||||
|
pyparsing_unicode.ไทย = pyparsing_unicode.Thai
|
||||||
|
pyparsing_unicode.देवनागरी = pyparsing_unicode.Devanagari
|
|
@ -0,0 +1,235 @@
|
||||||
|
# util.py
|
||||||
|
import warnings
|
||||||
|
import types
|
||||||
|
import collections
|
||||||
|
import itertools
|
||||||
|
from functools import lru_cache
|
||||||
|
from typing import List, Union, Iterable
|
||||||
|
|
||||||
|
_bslash = chr(92)
|
||||||
|
|
||||||
|
|
||||||
|
class __config_flags:
|
||||||
|
"""Internal class for defining compatibility and debugging flags"""
|
||||||
|
|
||||||
|
_all_names: List[str] = []
|
||||||
|
_fixed_names: List[str] = []
|
||||||
|
_type_desc = "configuration"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _set(cls, dname, value):
|
||||||
|
if dname in cls._fixed_names:
|
||||||
|
warnings.warn(
|
||||||
|
"{}.{} {} is {} and cannot be overridden".format(
|
||||||
|
cls.__name__,
|
||||||
|
dname,
|
||||||
|
cls._type_desc,
|
||||||
|
str(getattr(cls, dname)).upper(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if dname in cls._all_names:
|
||||||
|
setattr(cls, dname, value)
|
||||||
|
else:
|
||||||
|
raise ValueError("no such {} {!r}".format(cls._type_desc, dname))
|
||||||
|
|
||||||
|
enable = classmethod(lambda cls, name: cls._set(name, True))
|
||||||
|
disable = classmethod(lambda cls, name: cls._set(name, False))
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=128)
|
||||||
|
def col(loc: int, strg: str) -> int:
|
||||||
|
"""
|
||||||
|
Returns current column within a string, counting newlines as line separators.
|
||||||
|
The first column is number 1.
|
||||||
|
|
||||||
|
Note: the default parsing behavior is to expand tabs in the input string
|
||||||
|
before starting the parsing process. See
|
||||||
|
:class:`ParserElement.parseString` for more
|
||||||
|
information on parsing strings containing ``<TAB>`` s, and suggested
|
||||||
|
methods to maintain a consistent view of the parsed string, the parse
|
||||||
|
location, and line and column positions within the parsed string.
|
||||||
|
"""
|
||||||
|
s = strg
|
||||||
|
return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=128)
|
||||||
|
def lineno(loc: int, strg: str) -> int:
|
||||||
|
"""Returns current line number within a string, counting newlines as line separators.
|
||||||
|
The first line is number 1.
|
||||||
|
|
||||||
|
Note - the default parsing behavior is to expand tabs in the input string
|
||||||
|
before starting the parsing process. See :class:`ParserElement.parseString`
|
||||||
|
for more information on parsing strings containing ``<TAB>`` s, and
|
||||||
|
suggested methods to maintain a consistent view of the parsed string, the
|
||||||
|
parse location, and line and column positions within the parsed string.
|
||||||
|
"""
|
||||||
|
return strg.count("\n", 0, loc) + 1
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=128)
|
||||||
|
def line(loc: int, strg: str) -> str:
|
||||||
|
"""
|
||||||
|
Returns the line of text containing loc within a string, counting newlines as line separators.
|
||||||
|
"""
|
||||||
|
last_cr = strg.rfind("\n", 0, loc)
|
||||||
|
next_cr = strg.find("\n", loc)
|
||||||
|
return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :]
|
||||||
|
|
||||||
|
|
||||||
|
class _UnboundedCache:
|
||||||
|
def __init__(self):
|
||||||
|
cache = {}
|
||||||
|
cache_get = cache.get
|
||||||
|
self.not_in_cache = not_in_cache = object()
|
||||||
|
|
||||||
|
def get(_, key):
|
||||||
|
return cache_get(key, not_in_cache)
|
||||||
|
|
||||||
|
def set_(_, key, value):
|
||||||
|
cache[key] = value
|
||||||
|
|
||||||
|
def clear(_):
|
||||||
|
cache.clear()
|
||||||
|
|
||||||
|
self.size = None
|
||||||
|
self.get = types.MethodType(get, self)
|
||||||
|
self.set = types.MethodType(set_, self)
|
||||||
|
self.clear = types.MethodType(clear, self)
|
||||||
|
|
||||||
|
|
||||||
|
class _FifoCache:
|
||||||
|
def __init__(self, size):
|
||||||
|
self.not_in_cache = not_in_cache = object()
|
||||||
|
cache = collections.OrderedDict()
|
||||||
|
cache_get = cache.get
|
||||||
|
|
||||||
|
def get(_, key):
|
||||||
|
return cache_get(key, not_in_cache)
|
||||||
|
|
||||||
|
def set_(_, key, value):
|
||||||
|
cache[key] = value
|
||||||
|
while len(cache) > size:
|
||||||
|
cache.popitem(last=False)
|
||||||
|
|
||||||
|
def clear(_):
|
||||||
|
cache.clear()
|
||||||
|
|
||||||
|
self.size = size
|
||||||
|
self.get = types.MethodType(get, self)
|
||||||
|
self.set = types.MethodType(set_, self)
|
||||||
|
self.clear = types.MethodType(clear, self)
|
||||||
|
|
||||||
|
|
||||||
|
class LRUMemo:
|
||||||
|
"""
|
||||||
|
A memoizing mapping that retains `capacity` deleted items
|
||||||
|
|
||||||
|
The memo tracks retained items by their access order; once `capacity` items
|
||||||
|
are retained, the least recently used item is discarded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, capacity):
|
||||||
|
self._capacity = capacity
|
||||||
|
self._active = {}
|
||||||
|
self._memory = collections.OrderedDict()
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
try:
|
||||||
|
return self._active[key]
|
||||||
|
except KeyError:
|
||||||
|
self._memory.move_to_end(key)
|
||||||
|
return self._memory[key]
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
self._memory.pop(key, None)
|
||||||
|
self._active[key] = value
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
try:
|
||||||
|
value = self._active.pop(key)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
while len(self._memory) >= self._capacity:
|
||||||
|
self._memory.popitem(last=False)
|
||||||
|
self._memory[key] = value
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self._active.clear()
|
||||||
|
self._memory.clear()
|
||||||
|
|
||||||
|
|
||||||
|
class UnboundedMemo(dict):
|
||||||
|
"""
|
||||||
|
A memoizing mapping that retains all deleted items
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _escape_regex_range_chars(s: str) -> str:
|
||||||
|
# escape these chars: ^-[]
|
||||||
|
for c in r"\^-[]":
|
||||||
|
s = s.replace(c, _bslash + c)
|
||||||
|
s = s.replace("\n", r"\n")
|
||||||
|
s = s.replace("\t", r"\t")
|
||||||
|
return str(s)
|
||||||
|
|
||||||
|
|
||||||
|
def _collapse_string_to_ranges(
|
||||||
|
s: Union[str, Iterable[str]], re_escape: bool = True
|
||||||
|
) -> str:
|
||||||
|
def is_consecutive(c):
|
||||||
|
c_int = ord(c)
|
||||||
|
is_consecutive.prev, prev = c_int, is_consecutive.prev
|
||||||
|
if c_int - prev > 1:
|
||||||
|
is_consecutive.value = next(is_consecutive.counter)
|
||||||
|
return is_consecutive.value
|
||||||
|
|
||||||
|
is_consecutive.prev = 0
|
||||||
|
is_consecutive.counter = itertools.count()
|
||||||
|
is_consecutive.value = -1
|
||||||
|
|
||||||
|
def escape_re_range_char(c):
|
||||||
|
return "\\" + c if c in r"\^-][" else c
|
||||||
|
|
||||||
|
def no_escape_re_range_char(c):
|
||||||
|
return c
|
||||||
|
|
||||||
|
if not re_escape:
|
||||||
|
escape_re_range_char = no_escape_re_range_char
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
s = "".join(sorted(set(s)))
|
||||||
|
if len(s) > 3:
|
||||||
|
for _, chars in itertools.groupby(s, key=is_consecutive):
|
||||||
|
first = last = next(chars)
|
||||||
|
last = collections.deque(
|
||||||
|
itertools.chain(iter([last]), chars), maxlen=1
|
||||||
|
).pop()
|
||||||
|
if first == last:
|
||||||
|
ret.append(escape_re_range_char(first))
|
||||||
|
else:
|
||||||
|
sep = "" if ord(last) == ord(first) + 1 else "-"
|
||||||
|
ret.append(
|
||||||
|
"{}{}{}".format(
|
||||||
|
escape_re_range_char(first), sep, escape_re_range_char(last)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
ret = [escape_re_range_char(c) for c in s]
|
||||||
|
|
||||||
|
return "".join(ret)
|
||||||
|
|
||||||
|
|
||||||
|
def _flatten(ll: list) -> list:
|
||||||
|
ret = []
|
||||||
|
for i in ll:
|
||||||
|
if isinstance(i, list):
|
||||||
|
ret.extend(_flatten(i))
|
||||||
|
else:
|
||||||
|
ret.append(i)
|
||||||
|
return ret
|
|
@ -8,7 +8,7 @@ packaging==21.3
|
||||||
pep517==0.12.0
|
pep517==0.12.0
|
||||||
platformdirs==2.4.1
|
platformdirs==2.4.1
|
||||||
progress==1.6
|
progress==1.6
|
||||||
pyparsing==2.4.7
|
pyparsing==3.0.7
|
||||||
requests==2.27.1
|
requests==2.27.1
|
||||||
certifi==2021.05.30
|
certifi==2021.05.30
|
||||||
chardet==4.0.0
|
chardet==4.0.0
|
||||||
|
|
Loading…
Reference in New Issue