mirror of https://github.com/pypa/pip
Upgrade pyparsing to 3.0.7
This commit is contained in:
parent
5c565fc786
commit
5b14995b85
|
@ -0,0 +1 @@
|
|||
Upgrade pyparsing to 3.0.7
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,328 @@
|
|||
# module pyparsing.py
|
||||
#
|
||||
# Copyright (c) 2003-2021 Paul T. McGuire
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
# "Software"), to deal in the Software without restriction, including
|
||||
# without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so, subject to
|
||||
# the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
|
||||
__doc__ = """
|
||||
pyparsing module - Classes and methods to define and execute parsing grammars
|
||||
=============================================================================
|
||||
|
||||
The pyparsing module is an alternative approach to creating and
|
||||
executing simple grammars, vs. the traditional lex/yacc approach, or the
|
||||
use of regular expressions. With pyparsing, you don't need to learn
|
||||
a new syntax for defining grammars or matching expressions - the parsing
|
||||
module provides a library of classes that you use to construct the
|
||||
grammar directly in Python.
|
||||
|
||||
Here is a program to parse "Hello, World!" (or any greeting of the form
|
||||
``"<salutation>, <addressee>!"``), built up using :class:`Word`,
|
||||
:class:`Literal`, and :class:`And` elements
|
||||
(the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
|
||||
and the strings are auto-converted to :class:`Literal` expressions)::
|
||||
|
||||
from pip._vendor.pyparsing import Word, alphas
|
||||
|
||||
# define grammar of a greeting
|
||||
greet = Word(alphas) + "," + Word(alphas) + "!"
|
||||
|
||||
hello = "Hello, World!"
|
||||
print(hello, "->", greet.parse_string(hello))
|
||||
|
||||
The program outputs the following::
|
||||
|
||||
Hello, World! -> ['Hello', ',', 'World', '!']
|
||||
|
||||
The Python representation of the grammar is quite readable, owing to the
|
||||
self-explanatory class names, and the use of :class:`'+'<And>`,
|
||||
:class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators.
|
||||
|
||||
The :class:`ParseResults` object returned from
|
||||
:class:`ParserElement.parseString` can be
|
||||
accessed as a nested list, a dictionary, or an object with named
|
||||
attributes.
|
||||
|
||||
The pyparsing module handles some of the problems that are typically
|
||||
vexing when writing text parsers:
|
||||
|
||||
- extra or missing whitespace (the above program will also handle
|
||||
"Hello,World!", "Hello , World !", etc.)
|
||||
- quoted strings
|
||||
- embedded comments
|
||||
|
||||
|
||||
Getting Started -
|
||||
-----------------
|
||||
Visit the classes :class:`ParserElement` and :class:`ParseResults` to
|
||||
see the base classes that most other pyparsing
|
||||
classes inherit from. Use the docstrings for examples of how to:
|
||||
|
||||
- construct literal match expressions from :class:`Literal` and
|
||||
:class:`CaselessLiteral` classes
|
||||
- construct character word-group expressions using the :class:`Word`
|
||||
class
|
||||
- see how to create repetitive expressions using :class:`ZeroOrMore`
|
||||
and :class:`OneOrMore` classes
|
||||
- use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
|
||||
and :class:`'&'<Each>` operators to combine simple expressions into
|
||||
more complex ones
|
||||
- associate names with your parsed results using
|
||||
:class:`ParserElement.setResultsName`
|
||||
- access the parsed data, which is returned as a :class:`ParseResults`
|
||||
object
|
||||
- find some helpful expression short-cuts like :class:`delimitedList`
|
||||
and :class:`oneOf`
|
||||
- find more useful common expressions in the :class:`pyparsing_common`
|
||||
namespace class
|
||||
"""
|
||||
from typing import NamedTuple
|
||||
|
||||
|
||||
class version_info(NamedTuple):
|
||||
major: int
|
||||
minor: int
|
||||
micro: int
|
||||
releaselevel: str
|
||||
serial: int
|
||||
|
||||
@property
|
||||
def __version__(self):
|
||||
return "{}.{}.{}".format(self.major, self.minor, self.micro) + (
|
||||
"{}{}{}".format(
|
||||
"r" if self.releaselevel[0] == "c" else "",
|
||||
self.releaselevel[0],
|
||||
self.serial,
|
||||
),
|
||||
"",
|
||||
)[self.releaselevel == "final"]
|
||||
|
||||
def __str__(self):
|
||||
return "{} {} / {}".format(__name__, self.__version__, __version_time__)
|
||||
|
||||
def __repr__(self):
|
||||
return "{}.{}({})".format(
|
||||
__name__,
|
||||
type(self).__name__,
|
||||
", ".join("{}={!r}".format(*nv) for nv in zip(self._fields, self)),
|
||||
)
|
||||
|
||||
|
||||
__version_info__ = version_info(3, 0, 7, "final", 0)
|
||||
__version_time__ = "15 Jan 2022 04:10 UTC"
|
||||
__version__ = __version_info__.__version__
|
||||
__versionTime__ = __version_time__
|
||||
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
|
||||
|
||||
from .util import *
|
||||
from .exceptions import *
|
||||
from .actions import *
|
||||
from .core import __diag__, __compat__
|
||||
from .results import *
|
||||
from .core import *
|
||||
from .core import _builtin_exprs as core_builtin_exprs
|
||||
from .helpers import *
|
||||
from .helpers import _builtin_exprs as helper_builtin_exprs
|
||||
|
||||
from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
|
||||
from .testing import pyparsing_test as testing
|
||||
from .common import (
|
||||
pyparsing_common as common,
|
||||
_builtin_exprs as common_builtin_exprs,
|
||||
)
|
||||
|
||||
# define backward compat synonyms
|
||||
if "pyparsing_unicode" not in globals():
|
||||
pyparsing_unicode = unicode
|
||||
if "pyparsing_common" not in globals():
|
||||
pyparsing_common = common
|
||||
if "pyparsing_test" not in globals():
|
||||
pyparsing_test = testing
|
||||
|
||||
core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
|
||||
|
||||
|
||||
__all__ = [
|
||||
"__version__",
|
||||
"__version_time__",
|
||||
"__author__",
|
||||
"__compat__",
|
||||
"__diag__",
|
||||
"And",
|
||||
"AtLineStart",
|
||||
"AtStringStart",
|
||||
"CaselessKeyword",
|
||||
"CaselessLiteral",
|
||||
"CharsNotIn",
|
||||
"Combine",
|
||||
"Dict",
|
||||
"Each",
|
||||
"Empty",
|
||||
"FollowedBy",
|
||||
"Forward",
|
||||
"GoToColumn",
|
||||
"Group",
|
||||
"IndentedBlock",
|
||||
"Keyword",
|
||||
"LineEnd",
|
||||
"LineStart",
|
||||
"Literal",
|
||||
"Located",
|
||||
"PrecededBy",
|
||||
"MatchFirst",
|
||||
"NoMatch",
|
||||
"NotAny",
|
||||
"OneOrMore",
|
||||
"OnlyOnce",
|
||||
"OpAssoc",
|
||||
"Opt",
|
||||
"Optional",
|
||||
"Or",
|
||||
"ParseBaseException",
|
||||
"ParseElementEnhance",
|
||||
"ParseException",
|
||||
"ParseExpression",
|
||||
"ParseFatalException",
|
||||
"ParseResults",
|
||||
"ParseSyntaxException",
|
||||
"ParserElement",
|
||||
"PositionToken",
|
||||
"QuotedString",
|
||||
"RecursiveGrammarException",
|
||||
"Regex",
|
||||
"SkipTo",
|
||||
"StringEnd",
|
||||
"StringStart",
|
||||
"Suppress",
|
||||
"Token",
|
||||
"TokenConverter",
|
||||
"White",
|
||||
"Word",
|
||||
"WordEnd",
|
||||
"WordStart",
|
||||
"ZeroOrMore",
|
||||
"Char",
|
||||
"alphanums",
|
||||
"alphas",
|
||||
"alphas8bit",
|
||||
"any_close_tag",
|
||||
"any_open_tag",
|
||||
"c_style_comment",
|
||||
"col",
|
||||
"common_html_entity",
|
||||
"counted_array",
|
||||
"cpp_style_comment",
|
||||
"dbl_quoted_string",
|
||||
"dbl_slash_comment",
|
||||
"delimited_list",
|
||||
"dict_of",
|
||||
"empty",
|
||||
"hexnums",
|
||||
"html_comment",
|
||||
"identchars",
|
||||
"identbodychars",
|
||||
"java_style_comment",
|
||||
"line",
|
||||
"line_end",
|
||||
"line_start",
|
||||
"lineno",
|
||||
"make_html_tags",
|
||||
"make_xml_tags",
|
||||
"match_only_at_col",
|
||||
"match_previous_expr",
|
||||
"match_previous_literal",
|
||||
"nested_expr",
|
||||
"null_debug_action",
|
||||
"nums",
|
||||
"one_of",
|
||||
"printables",
|
||||
"punc8bit",
|
||||
"python_style_comment",
|
||||
"quoted_string",
|
||||
"remove_quotes",
|
||||
"replace_with",
|
||||
"replace_html_entity",
|
||||
"rest_of_line",
|
||||
"sgl_quoted_string",
|
||||
"srange",
|
||||
"string_end",
|
||||
"string_start",
|
||||
"trace_parse_action",
|
||||
"unicode_string",
|
||||
"with_attribute",
|
||||
"indentedBlock",
|
||||
"original_text_for",
|
||||
"ungroup",
|
||||
"infix_notation",
|
||||
"locatedExpr",
|
||||
"with_class",
|
||||
"CloseMatch",
|
||||
"token_map",
|
||||
"pyparsing_common",
|
||||
"pyparsing_unicode",
|
||||
"unicode_set",
|
||||
"condition_as_parse_action",
|
||||
"pyparsing_test",
|
||||
# pre-PEP8 compatibility names
|
||||
"__versionTime__",
|
||||
"anyCloseTag",
|
||||
"anyOpenTag",
|
||||
"cStyleComment",
|
||||
"commonHTMLEntity",
|
||||
"countedArray",
|
||||
"cppStyleComment",
|
||||
"dblQuotedString",
|
||||
"dblSlashComment",
|
||||
"delimitedList",
|
||||
"dictOf",
|
||||
"htmlComment",
|
||||
"javaStyleComment",
|
||||
"lineEnd",
|
||||
"lineStart",
|
||||
"makeHTMLTags",
|
||||
"makeXMLTags",
|
||||
"matchOnlyAtCol",
|
||||
"matchPreviousExpr",
|
||||
"matchPreviousLiteral",
|
||||
"nestedExpr",
|
||||
"nullDebugAction",
|
||||
"oneOf",
|
||||
"opAssoc",
|
||||
"pythonStyleComment",
|
||||
"quotedString",
|
||||
"removeQuotes",
|
||||
"replaceHTMLEntity",
|
||||
"replaceWith",
|
||||
"restOfLine",
|
||||
"sglQuotedString",
|
||||
"stringEnd",
|
||||
"stringStart",
|
||||
"traceParseAction",
|
||||
"unicodeString",
|
||||
"withAttribute",
|
||||
"indentedBlock",
|
||||
"originalTextFor",
|
||||
"infixNotation",
|
||||
"locatedExpr",
|
||||
"withClass",
|
||||
"tokenMap",
|
||||
"conditionAsParseAction",
|
||||
"autoname_elements",
|
||||
]
|
|
@ -0,0 +1,207 @@
|
|||
# actions.py
|
||||
|
||||
from .exceptions import ParseException
|
||||
from .util import col
|
||||
|
||||
|
||||
class OnlyOnce:
|
||||
"""
|
||||
Wrapper for parse actions, to ensure they are only called once.
|
||||
"""
|
||||
|
||||
def __init__(self, method_call):
|
||||
from .core import _trim_arity
|
||||
|
||||
self.callable = _trim_arity(method_call)
|
||||
self.called = False
|
||||
|
||||
def __call__(self, s, l, t):
|
||||
if not self.called:
|
||||
results = self.callable(s, l, t)
|
||||
self.called = True
|
||||
return results
|
||||
raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Allow the associated parse action to be called once more.
|
||||
"""
|
||||
|
||||
self.called = False
|
||||
|
||||
|
||||
def match_only_at_col(n):
|
||||
"""
|
||||
Helper method for defining parse actions that require matching at
|
||||
a specific column in the input text.
|
||||
"""
|
||||
|
||||
def verify_col(strg, locn, toks):
|
||||
if col(locn, strg) != n:
|
||||
raise ParseException(strg, locn, "matched token not at column {}".format(n))
|
||||
|
||||
return verify_col
|
||||
|
||||
|
||||
def replace_with(repl_str):
|
||||
"""
|
||||
Helper method for common parse actions that simply return
|
||||
a literal value. Especially useful when used with
|
||||
:class:`transform_string<ParserElement.transform_string>` ().
|
||||
|
||||
Example::
|
||||
|
||||
num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
|
||||
na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
|
||||
term = na | num
|
||||
|
||||
OneOrMore(term).parse_string("324 234 N/A 234") # -> [324, 234, nan, 234]
|
||||
"""
|
||||
return lambda s, l, t: [repl_str]
|
||||
|
||||
|
||||
def remove_quotes(s, l, t):
|
||||
"""
|
||||
Helper parse action for removing quotation marks from parsed
|
||||
quoted strings.
|
||||
|
||||
Example::
|
||||
|
||||
# by default, quotation marks are included in parsed results
|
||||
quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
|
||||
|
||||
# use remove_quotes to strip quotation marks from parsed results
|
||||
quoted_string.set_parse_action(remove_quotes)
|
||||
quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
|
||||
"""
|
||||
return t[0][1:-1]
|
||||
|
||||
|
||||
def with_attribute(*args, **attr_dict):
|
||||
"""
|
||||
Helper to create a validating parse action to be used with start
|
||||
tags created with :class:`make_xml_tags` or
|
||||
:class:`make_html_tags`. Use ``with_attribute`` to qualify
|
||||
a starting tag with a required attribute value, to avoid false
|
||||
matches on common tags such as ``<TD>`` or ``<DIV>``.
|
||||
|
||||
Call ``with_attribute`` with a series of attribute names and
|
||||
values. Specify the list of filter attributes names and values as:
|
||||
|
||||
- keyword arguments, as in ``(align="right")``, or
|
||||
- as an explicit dict with ``**`` operator, when an attribute
|
||||
name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
|
||||
- a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
|
||||
|
||||
For attribute names with a namespace prefix, you must use the second
|
||||
form. Attribute names are matched insensitive to upper/lower case.
|
||||
|
||||
If just testing for ``class`` (with or without a namespace), use
|
||||
:class:`with_class`.
|
||||
|
||||
To verify that the attribute exists, but without specifying a value,
|
||||
pass ``with_attribute.ANY_VALUE`` as the value.
|
||||
|
||||
Example::
|
||||
|
||||
html = '''
|
||||
<div>
|
||||
Some text
|
||||
<div type="grid">1 4 0 1 0</div>
|
||||
<div type="graph">1,3 2,3 1,1</div>
|
||||
<div>this has no type</div>
|
||||
</div>
|
||||
|
||||
'''
|
||||
div,div_end = make_html_tags("div")
|
||||
|
||||
# only match div tag having a type attribute with value "grid"
|
||||
div_grid = div().set_parse_action(with_attribute(type="grid"))
|
||||
grid_expr = div_grid + SkipTo(div | div_end)("body")
|
||||
for grid_header in grid_expr.search_string(html):
|
||||
print(grid_header.body)
|
||||
|
||||
# construct a match with any div tag having a type attribute, regardless of the value
|
||||
div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE))
|
||||
div_expr = div_any_type + SkipTo(div | div_end)("body")
|
||||
for div_header in div_expr.search_string(html):
|
||||
print(div_header.body)
|
||||
|
||||
prints::
|
||||
|
||||
1 4 0 1 0
|
||||
|
||||
1 4 0 1 0
|
||||
1,3 2,3 1,1
|
||||
"""
|
||||
if args:
|
||||
attrs = args[:]
|
||||
else:
|
||||
attrs = attr_dict.items()
|
||||
attrs = [(k, v) for k, v in attrs]
|
||||
|
||||
def pa(s, l, tokens):
|
||||
for attrName, attrValue in attrs:
|
||||
if attrName not in tokens:
|
||||
raise ParseException(s, l, "no matching attribute " + attrName)
|
||||
if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue:
|
||||
raise ParseException(
|
||||
s,
|
||||
l,
|
||||
"attribute {!r} has value {!r}, must be {!r}".format(
|
||||
attrName, tokens[attrName], attrValue
|
||||
),
|
||||
)
|
||||
|
||||
return pa
|
||||
|
||||
|
||||
with_attribute.ANY_VALUE = object()
|
||||
|
||||
|
||||
def with_class(classname, namespace=""):
|
||||
"""
|
||||
Simplified version of :class:`with_attribute` when
|
||||
matching on a div class - made difficult because ``class`` is
|
||||
a reserved word in Python.
|
||||
|
||||
Example::
|
||||
|
||||
html = '''
|
||||
<div>
|
||||
Some text
|
||||
<div class="grid">1 4 0 1 0</div>
|
||||
<div class="graph">1,3 2,3 1,1</div>
|
||||
<div>this <div> has no class</div>
|
||||
</div>
|
||||
|
||||
'''
|
||||
div,div_end = make_html_tags("div")
|
||||
div_grid = div().set_parse_action(with_class("grid"))
|
||||
|
||||
grid_expr = div_grid + SkipTo(div | div_end)("body")
|
||||
for grid_header in grid_expr.search_string(html):
|
||||
print(grid_header.body)
|
||||
|
||||
div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE))
|
||||
div_expr = div_any_type + SkipTo(div | div_end)("body")
|
||||
for div_header in div_expr.search_string(html):
|
||||
print(div_header.body)
|
||||
|
||||
prints::
|
||||
|
||||
1 4 0 1 0
|
||||
|
||||
1 4 0 1 0
|
||||
1,3 2,3 1,1
|
||||
"""
|
||||
classattr = "{}:class".format(namespace) if namespace else "class"
|
||||
return with_attribute(**{classattr: classname})
|
||||
|
||||
|
||||
# pre-PEP8 compatibility symbols
|
||||
replaceWith = replace_with
|
||||
removeQuotes = remove_quotes
|
||||
withAttribute = with_attribute
|
||||
withClass = with_class
|
||||
matchOnlyAtCol = match_only_at_col
|
|
@ -0,0 +1,424 @@
|
|||
# common.py
|
||||
from .core import *
|
||||
from .helpers import delimited_list, any_open_tag, any_close_tag
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
# some other useful expressions - using lower-case class name since we are really using this as a namespace
|
||||
class pyparsing_common:
|
||||
"""Here are some common low-level expressions that may be useful in
|
||||
jump-starting parser development:
|
||||
|
||||
- numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
|
||||
:class:`scientific notation<sci_real>`)
|
||||
- common :class:`programming identifiers<identifier>`
|
||||
- network addresses (:class:`MAC<mac_address>`,
|
||||
:class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
|
||||
- ISO8601 :class:`dates<iso8601_date>` and
|
||||
:class:`datetime<iso8601_datetime>`
|
||||
- :class:`UUID<uuid>`
|
||||
- :class:`comma-separated list<comma_separated_list>`
|
||||
- :class:`url`
|
||||
|
||||
Parse actions:
|
||||
|
||||
- :class:`convertToInteger`
|
||||
- :class:`convertToFloat`
|
||||
- :class:`convertToDate`
|
||||
- :class:`convertToDatetime`
|
||||
- :class:`stripHTMLTags`
|
||||
- :class:`upcaseTokens`
|
||||
- :class:`downcaseTokens`
|
||||
|
||||
Example::
|
||||
|
||||
pyparsing_common.number.runTests('''
|
||||
# any int or real number, returned as the appropriate type
|
||||
100
|
||||
-100
|
||||
+100
|
||||
3.14159
|
||||
6.02e23
|
||||
1e-12
|
||||
''')
|
||||
|
||||
pyparsing_common.fnumber.runTests('''
|
||||
# any int or real number, returned as float
|
||||
100
|
||||
-100
|
||||
+100
|
||||
3.14159
|
||||
6.02e23
|
||||
1e-12
|
||||
''')
|
||||
|
||||
pyparsing_common.hex_integer.runTests('''
|
||||
# hex numbers
|
||||
100
|
||||
FF
|
||||
''')
|
||||
|
||||
pyparsing_common.fraction.runTests('''
|
||||
# fractions
|
||||
1/2
|
||||
-3/4
|
||||
''')
|
||||
|
||||
pyparsing_common.mixed_integer.runTests('''
|
||||
# mixed fractions
|
||||
1
|
||||
1/2
|
||||
-3/4
|
||||
1-3/4
|
||||
''')
|
||||
|
||||
import uuid
|
||||
pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
|
||||
pyparsing_common.uuid.runTests('''
|
||||
# uuid
|
||||
12345678-1234-5678-1234-567812345678
|
||||
''')
|
||||
|
||||
prints::
|
||||
|
||||
# any int or real number, returned as the appropriate type
|
||||
100
|
||||
[100]
|
||||
|
||||
-100
|
||||
[-100]
|
||||
|
||||
+100
|
||||
[100]
|
||||
|
||||
3.14159
|
||||
[3.14159]
|
||||
|
||||
6.02e23
|
||||
[6.02e+23]
|
||||
|
||||
1e-12
|
||||
[1e-12]
|
||||
|
||||
# any int or real number, returned as float
|
||||
100
|
||||
[100.0]
|
||||
|
||||
-100
|
||||
[-100.0]
|
||||
|
||||
+100
|
||||
[100.0]
|
||||
|
||||
3.14159
|
||||
[3.14159]
|
||||
|
||||
6.02e23
|
||||
[6.02e+23]
|
||||
|
||||
1e-12
|
||||
[1e-12]
|
||||
|
||||
# hex numbers
|
||||
100
|
||||
[256]
|
||||
|
||||
FF
|
||||
[255]
|
||||
|
||||
# fractions
|
||||
1/2
|
||||
[0.5]
|
||||
|
||||
-3/4
|
||||
[-0.75]
|
||||
|
||||
# mixed fractions
|
||||
1
|
||||
[1]
|
||||
|
||||
1/2
|
||||
[0.5]
|
||||
|
||||
-3/4
|
||||
[-0.75]
|
||||
|
||||
1-3/4
|
||||
[1.75]
|
||||
|
||||
# uuid
|
||||
12345678-1234-5678-1234-567812345678
|
||||
[UUID('12345678-1234-5678-1234-567812345678')]
|
||||
"""
|
||||
|
||||
convert_to_integer = token_map(int)
|
||||
"""
|
||||
Parse action for converting parsed integers to Python int
|
||||
"""
|
||||
|
||||
convert_to_float = token_map(float)
|
||||
"""
|
||||
Parse action for converting parsed numbers to Python float
|
||||
"""
|
||||
|
||||
integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer)
|
||||
"""expression that parses an unsigned integer, returns an int"""
|
||||
|
||||
hex_integer = (
|
||||
Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
|
||||
)
|
||||
"""expression that parses a hexadecimal integer, returns an int"""
|
||||
|
||||
signed_integer = (
|
||||
Regex(r"[+-]?\d+")
|
||||
.set_name("signed integer")
|
||||
.set_parse_action(convert_to_integer)
|
||||
)
|
||||
"""expression that parses an integer with optional leading sign, returns an int"""
|
||||
|
||||
fraction = (
|
||||
signed_integer().set_parse_action(convert_to_float)
|
||||
+ "/"
|
||||
+ signed_integer().set_parse_action(convert_to_float)
|
||||
).set_name("fraction")
|
||||
"""fractional expression of an integer divided by an integer, returns a float"""
|
||||
fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
|
||||
|
||||
mixed_integer = (
|
||||
fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
|
||||
).set_name("fraction or mixed integer-fraction")
|
||||
"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
|
||||
mixed_integer.add_parse_action(sum)
|
||||
|
||||
real = (
|
||||
Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
|
||||
.set_name("real number")
|
||||
.set_parse_action(convert_to_float)
|
||||
)
|
||||
"""expression that parses a floating point number and returns a float"""
|
||||
|
||||
sci_real = (
|
||||
Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
|
||||
.set_name("real number with scientific notation")
|
||||
.set_parse_action(convert_to_float)
|
||||
)
|
||||
"""expression that parses a floating point number with optional
|
||||
scientific notation and returns a float"""
|
||||
|
||||
# streamlining this expression makes the docs nicer-looking
|
||||
number = (sci_real | real | signed_integer).setName("number").streamline()
|
||||
"""any numeric expression, returns the corresponding Python type"""
|
||||
|
||||
fnumber = (
|
||||
Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?")
|
||||
.set_name("fnumber")
|
||||
.set_parse_action(convert_to_float)
|
||||
)
|
||||
"""any int or real number, returned as float"""
|
||||
|
||||
identifier = Word(identchars, identbodychars).set_name("identifier")
|
||||
"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
|
||||
|
||||
ipv4_address = Regex(
|
||||
r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
|
||||
).set_name("IPv4 address")
|
||||
"IPv4 address (``0.0.0.0 - 255.255.255.255``)"
|
||||
|
||||
_ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
|
||||
_full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
|
||||
"full IPv6 address"
|
||||
)
|
||||
_short_ipv6_address = (
|
||||
Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
|
||||
+ "::"
|
||||
+ Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
|
||||
).set_name("short IPv6 address")
|
||||
_short_ipv6_address.add_condition(
|
||||
lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
|
||||
)
|
||||
_mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
|
||||
ipv6_address = Combine(
|
||||
(_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
|
||||
"IPv6 address"
|
||||
)
|
||||
).set_name("IPv6 address")
|
||||
"IPv6 address (long, short, or mixed form)"
|
||||
|
||||
mac_address = Regex(
|
||||
r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
|
||||
).set_name("MAC address")
|
||||
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
|
||||
|
||||
@staticmethod
|
||||
def convert_to_date(fmt: str = "%Y-%m-%d"):
|
||||
"""
|
||||
Helper to create a parse action for converting parsed date string to Python datetime.date
|
||||
|
||||
Params -
|
||||
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
|
||||
|
||||
Example::
|
||||
|
||||
date_expr = pyparsing_common.iso8601_date.copy()
|
||||
date_expr.setParseAction(pyparsing_common.convertToDate())
|
||||
print(date_expr.parseString("1999-12-31"))
|
||||
|
||||
prints::
|
||||
|
||||
[datetime.date(1999, 12, 31)]
|
||||
"""
|
||||
|
||||
def cvt_fn(ss, ll, tt):
|
||||
try:
|
||||
return datetime.strptime(tt[0], fmt).date()
|
||||
except ValueError as ve:
|
||||
raise ParseException(ss, ll, str(ve))
|
||||
|
||||
return cvt_fn
|
||||
|
||||
@staticmethod
|
||||
def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
|
||||
"""Helper to create a parse action for converting parsed
|
||||
datetime string to Python datetime.datetime
|
||||
|
||||
Params -
|
||||
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
|
||||
|
||||
Example::
|
||||
|
||||
dt_expr = pyparsing_common.iso8601_datetime.copy()
|
||||
dt_expr.setParseAction(pyparsing_common.convertToDatetime())
|
||||
print(dt_expr.parseString("1999-12-31T23:59:59.999"))
|
||||
|
||||
prints::
|
||||
|
||||
[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
|
||||
"""
|
||||
|
||||
def cvt_fn(s, l, t):
|
||||
try:
|
||||
return datetime.strptime(t[0], fmt)
|
||||
except ValueError as ve:
|
||||
raise ParseException(s, l, str(ve))
|
||||
|
||||
return cvt_fn
|
||||
|
||||
iso8601_date = Regex(
|
||||
r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
|
||||
).set_name("ISO8601 date")
|
||||
"ISO8601 date (``yyyy-mm-dd``)"
|
||||
|
||||
iso8601_datetime = Regex(
|
||||
r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
|
||||
).set_name("ISO8601 datetime")
|
||||
"ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
|
||||
|
||||
uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
|
||||
"UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
|
||||
|
||||
_html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
|
||||
|
||||
@staticmethod
|
||||
def strip_html_tags(s: str, l: int, tokens: ParseResults):
|
||||
"""Parse action to remove HTML tags from web page HTML source
|
||||
|
||||
Example::
|
||||
|
||||
# strip HTML links from normal text
|
||||
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
|
||||
td, td_end = makeHTMLTags("TD")
|
||||
table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
|
||||
print(table_text.parseString(text).body)
|
||||
|
||||
Prints::
|
||||
|
||||
More info at the pyparsing wiki page
|
||||
"""
|
||||
return pyparsing_common._html_stripper.transform_string(tokens[0])
|
||||
|
||||
_commasepitem = (
|
||||
Combine(
|
||||
OneOrMore(
|
||||
~Literal(",")
|
||||
+ ~LineEnd()
|
||||
+ Word(printables, exclude_chars=",")
|
||||
+ Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
|
||||
)
|
||||
)
|
||||
.streamline()
|
||||
.set_name("commaItem")
|
||||
)
|
||||
comma_separated_list = delimited_list(
|
||||
Opt(quoted_string.copy() | _commasepitem, default="")
|
||||
).set_name("comma separated list")
|
||||
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
|
||||
|
||||
upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
|
||||
"""Parse action to convert tokens to upper case."""
|
||||
|
||||
downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
|
||||
"""Parse action to convert tokens to lower case."""
|
||||
|
||||
# fmt: off
|
||||
url = Regex(
|
||||
# https://mathiasbynens.be/demo/url-regex
|
||||
# https://gist.github.com/dperini/729294
|
||||
r"^" +
|
||||
# protocol identifier (optional)
|
||||
# short syntax // still required
|
||||
r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
|
||||
# user:pass BasicAuth (optional)
|
||||
r"(?:(?P<auth>\S+(?::\S*)?)@)?" +
|
||||
r"(?P<host>" +
|
||||
# IP address exclusion
|
||||
# private & local networks
|
||||
r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
|
||||
r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
|
||||
r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
|
||||
# IP address dotted notation octets
|
||||
# excludes loopback network 0.0.0.0
|
||||
# excludes reserved space >= 224.0.0.0
|
||||
# excludes network & broadcast addresses
|
||||
# (first & last IP address of each class)
|
||||
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
|
||||
r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
|
||||
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
|
||||
r"|" +
|
||||
# host & domain names, may end with dot
|
||||
# can be replaced by a shortest alternative
|
||||
# (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
|
||||
r"(?:" +
|
||||
r"(?:" +
|
||||
r"[a-z0-9\u00a1-\uffff]" +
|
||||
r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
|
||||
r")?" +
|
||||
r"[a-z0-9\u00a1-\uffff]\." +
|
||||
r")+" +
|
||||
# TLD identifier name, may end with dot
|
||||
r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
|
||||
r")" +
|
||||
# port number (optional)
|
||||
r"(:(?P<port>\d{2,5}))?" +
|
||||
# resource path (optional)
|
||||
r"(?P<path>\/[^?# ]*)?" +
|
||||
# query string (optional)
|
||||
r"(\?(?P<query>[^#]*))?" +
|
||||
# fragment (optional)
|
||||
r"(#(?P<fragment>\S*))?" +
|
||||
r"$"
|
||||
).set_name("url")
|
||||
# fmt: on
|
||||
|
||||
# pre-PEP8 compatibility names
|
||||
convertToInteger = convert_to_integer
|
||||
convertToFloat = convert_to_float
|
||||
convertToDate = convert_to_date
|
||||
convertToDatetime = convert_to_datetime
|
||||
stripHTMLTags = strip_html_tags
|
||||
upcaseTokens = upcase_tokens
|
||||
downcaseTokens = downcase_tokens
|
||||
|
||||
|
||||
_builtin_exprs = [
|
||||
v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
|
||||
]
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,593 @@
|
|||
import railroad
|
||||
from pip._vendor import pyparsing
|
||||
from pip._vendor.pkg_resources import resource_filename
|
||||
from typing import (
|
||||
List,
|
||||
Optional,
|
||||
NamedTuple,
|
||||
Generic,
|
||||
TypeVar,
|
||||
Dict,
|
||||
Callable,
|
||||
Set,
|
||||
Iterable,
|
||||
)
|
||||
from jinja2 import Template
|
||||
from io import StringIO
|
||||
import inspect
|
||||
|
||||
with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
|
||||
template = Template(fp.read())
|
||||
|
||||
# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
|
||||
NamedDiagram = NamedTuple(
|
||||
"NamedDiagram",
|
||||
[("name", str), ("diagram", Optional[railroad.DiagramItem]), ("index", int)],
|
||||
)
|
||||
"""
|
||||
A simple structure for associating a name with a railroad diagram
|
||||
"""
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class EachItem(railroad.Group):
|
||||
"""
|
||||
Custom railroad item to compose a:
|
||||
- Group containing a
|
||||
- OneOrMore containing a
|
||||
- Choice of the elements in the Each
|
||||
with the group label indicating that all must be matched
|
||||
"""
|
||||
|
||||
all_label = "[ALL]"
|
||||
|
||||
def __init__(self, *items):
|
||||
choice_item = railroad.Choice(len(items) - 1, *items)
|
||||
one_or_more_item = railroad.OneOrMore(item=choice_item)
|
||||
super().__init__(one_or_more_item, label=self.all_label)
|
||||
|
||||
|
||||
class AnnotatedItem(railroad.Group):
|
||||
"""
|
||||
Simple subclass of Group that creates an annotation label
|
||||
"""
|
||||
|
||||
def __init__(self, label: str, item):
|
||||
super().__init__(item=item, label="[{}]".format(label))
|
||||
|
||||
|
||||
class EditablePartial(Generic[T]):
|
||||
"""
|
||||
Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
|
||||
constructed.
|
||||
"""
|
||||
|
||||
# We need this here because the railroad constructors actually transform the data, so can't be called until the
|
||||
# entire tree is assembled
|
||||
|
||||
def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
|
||||
self.func = func
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
@classmethod
|
||||
def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
|
||||
"""
|
||||
If you call this function in the same way that you would call the constructor, it will store the arguments
|
||||
as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
|
||||
"""
|
||||
return EditablePartial(func=func, args=list(args), kwargs=kwargs)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.kwargs["name"]
|
||||
|
||||
def __call__(self) -> T:
|
||||
"""
|
||||
Evaluate the partial and return the result
|
||||
"""
|
||||
args = self.args.copy()
|
||||
kwargs = self.kwargs.copy()
|
||||
|
||||
# This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
|
||||
# args=['list', 'of', 'things'])
|
||||
arg_spec = inspect.getfullargspec(self.func)
|
||||
if arg_spec.varargs in self.kwargs:
|
||||
args += kwargs.pop(arg_spec.varargs)
|
||||
|
||||
return self.func(*args, **kwargs)
|
||||
|
||||
|
||||
def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str:
|
||||
"""
|
||||
Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
|
||||
:params kwargs: kwargs to be passed in to the template
|
||||
"""
|
||||
data = []
|
||||
for diagram in diagrams:
|
||||
io = StringIO()
|
||||
diagram.diagram.writeSvg(io.write)
|
||||
title = diagram.name
|
||||
if diagram.index == 0:
|
||||
title += " (root)"
|
||||
data.append({"title": title, "text": "", "svg": io.getvalue()})
|
||||
|
||||
return template.render(diagrams=data, **kwargs)
|
||||
|
||||
|
||||
def resolve_partial(partial: "EditablePartial[T]") -> T:
|
||||
"""
|
||||
Recursively resolves a collection of Partials into whatever type they are
|
||||
"""
|
||||
if isinstance(partial, EditablePartial):
|
||||
partial.args = resolve_partial(partial.args)
|
||||
partial.kwargs = resolve_partial(partial.kwargs)
|
||||
return partial()
|
||||
elif isinstance(partial, list):
|
||||
return [resolve_partial(x) for x in partial]
|
||||
elif isinstance(partial, dict):
|
||||
return {key: resolve_partial(x) for key, x in partial.items()}
|
||||
else:
|
||||
return partial
|
||||
|
||||
|
||||
def to_railroad(
|
||||
element: pyparsing.ParserElement,
|
||||
diagram_kwargs: Optional[dict] = None,
|
||||
vertical: int = 3,
|
||||
show_results_names: bool = False,
|
||||
) -> List[NamedDiagram]:
|
||||
"""
|
||||
Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
|
||||
creation if you want to access the Railroad tree before it is converted to HTML
|
||||
:param element: base element of the parser being diagrammed
|
||||
:param diagram_kwargs: kwargs to pass to the Diagram() constructor
|
||||
:param vertical: (optional) - int - limit at which number of alternatives should be
|
||||
shown vertically instead of horizontally
|
||||
:param show_results_names - bool to indicate whether results name annotations should be
|
||||
included in the diagram
|
||||
"""
|
||||
# Convert the whole tree underneath the root
|
||||
lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
|
||||
_to_diagram_element(
|
||||
element,
|
||||
lookup=lookup,
|
||||
parent=None,
|
||||
vertical=vertical,
|
||||
show_results_names=show_results_names,
|
||||
)
|
||||
|
||||
root_id = id(element)
|
||||
# Convert the root if it hasn't been already
|
||||
if root_id in lookup:
|
||||
if not element.customName:
|
||||
lookup[root_id].name = ""
|
||||
lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
|
||||
|
||||
# Now that we're finished, we can convert from intermediate structures into Railroad elements
|
||||
diags = list(lookup.diagrams.values())
|
||||
if len(diags) > 1:
|
||||
# collapse out duplicate diags with the same name
|
||||
seen = set()
|
||||
deduped_diags = []
|
||||
for d in diags:
|
||||
# don't extract SkipTo elements, they are uninformative as subdiagrams
|
||||
if d.name == "...":
|
||||
continue
|
||||
if d.name is not None and d.name not in seen:
|
||||
seen.add(d.name)
|
||||
deduped_diags.append(d)
|
||||
resolved = [resolve_partial(partial) for partial in deduped_diags]
|
||||
else:
|
||||
# special case - if just one diagram, always display it, even if
|
||||
# it has no name
|
||||
resolved = [resolve_partial(partial) for partial in diags]
|
||||
return sorted(resolved, key=lambda diag: diag.index)
|
||||
|
||||
|
||||
def _should_vertical(
|
||||
specification: int, exprs: Iterable[pyparsing.ParserElement]
|
||||
) -> bool:
|
||||
"""
|
||||
Returns true if we should return a vertical list of elements
|
||||
"""
|
||||
if specification is None:
|
||||
return False
|
||||
else:
|
||||
return len(_visible_exprs(exprs)) >= specification
|
||||
|
||||
|
||||
class ElementState:
|
||||
"""
|
||||
State recorded for an individual pyparsing Element
|
||||
"""
|
||||
|
||||
# Note: this should be a dataclass, but we have to support Python 3.5
|
||||
def __init__(
|
||||
self,
|
||||
element: pyparsing.ParserElement,
|
||||
converted: EditablePartial,
|
||||
parent: EditablePartial,
|
||||
number: int,
|
||||
name: str = None,
|
||||
parent_index: Optional[int] = None,
|
||||
):
|
||||
#: The pyparsing element that this represents
|
||||
self.element: pyparsing.ParserElement = element
|
||||
#: The name of the element
|
||||
self.name: str = name
|
||||
#: The output Railroad element in an unconverted state
|
||||
self.converted: EditablePartial = converted
|
||||
#: The parent Railroad element, which we store so that we can extract this if it's duplicated
|
||||
self.parent: EditablePartial = parent
|
||||
#: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
|
||||
self.number: int = number
|
||||
#: The index of this inside its parent
|
||||
self.parent_index: Optional[int] = parent_index
|
||||
#: If true, we should extract this out into a subdiagram
|
||||
self.extract: bool = False
|
||||
#: If true, all of this element's children have been filled out
|
||||
self.complete: bool = False
|
||||
|
||||
def mark_for_extraction(
|
||||
self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
|
||||
):
|
||||
"""
|
||||
Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
|
||||
:param el_id: id of the element
|
||||
:param state: element/diagram state tracker
|
||||
:param name: name to use for this element's text
|
||||
:param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
|
||||
root element when we know we're finished
|
||||
"""
|
||||
self.extract = True
|
||||
|
||||
# Set the name
|
||||
if not self.name:
|
||||
if name:
|
||||
# Allow forcing a custom name
|
||||
self.name = name
|
||||
elif self.element.customName:
|
||||
self.name = self.element.customName
|
||||
else:
|
||||
self.name = ""
|
||||
|
||||
# Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
|
||||
# to be added
|
||||
# Also, if this is just a string literal etc, don't bother extracting it
|
||||
if force or (self.complete and _worth_extracting(self.element)):
|
||||
state.extract_into_diagram(el_id)
|
||||
|
||||
|
||||
class ConverterState:
|
||||
"""
|
||||
Stores some state that persists between recursions into the element tree
|
||||
"""
|
||||
|
||||
def __init__(self, diagram_kwargs: Optional[dict] = None):
|
||||
#: A dictionary mapping ParserElements to state relating to them
|
||||
self._element_diagram_states: Dict[int, ElementState] = {}
|
||||
#: A dictionary mapping ParserElement IDs to subdiagrams generated from them
|
||||
self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
|
||||
#: The index of the next unnamed element
|
||||
self.unnamed_index: int = 1
|
||||
#: The index of the next element. This is used for sorting
|
||||
self.index: int = 0
|
||||
#: Shared kwargs that are used to customize the construction of diagrams
|
||||
self.diagram_kwargs: dict = diagram_kwargs or {}
|
||||
self.extracted_diagram_names: Set[str] = set()
|
||||
|
||||
def __setitem__(self, key: int, value: ElementState):
|
||||
self._element_diagram_states[key] = value
|
||||
|
||||
def __getitem__(self, key: int) -> ElementState:
|
||||
return self._element_diagram_states[key]
|
||||
|
||||
def __delitem__(self, key: int):
|
||||
del self._element_diagram_states[key]
|
||||
|
||||
def __contains__(self, key: int):
|
||||
return key in self._element_diagram_states
|
||||
|
||||
def generate_unnamed(self) -> int:
|
||||
"""
|
||||
Generate a number used in the name of an otherwise unnamed diagram
|
||||
"""
|
||||
self.unnamed_index += 1
|
||||
return self.unnamed_index
|
||||
|
||||
def generate_index(self) -> int:
|
||||
"""
|
||||
Generate a number used to index a diagram
|
||||
"""
|
||||
self.index += 1
|
||||
return self.index
|
||||
|
||||
def extract_into_diagram(self, el_id: int):
|
||||
"""
|
||||
Used when we encounter the same token twice in the same tree. When this
|
||||
happens, we replace all instances of that token with a terminal, and
|
||||
create a new subdiagram for the token
|
||||
"""
|
||||
position = self[el_id]
|
||||
|
||||
# Replace the original definition of this element with a regular block
|
||||
if position.parent:
|
||||
ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
|
||||
if "item" in position.parent.kwargs:
|
||||
position.parent.kwargs["item"] = ret
|
||||
elif "items" in position.parent.kwargs:
|
||||
position.parent.kwargs["items"][position.parent_index] = ret
|
||||
|
||||
# If the element we're extracting is a group, skip to its content but keep the title
|
||||
if position.converted.func == railroad.Group:
|
||||
content = position.converted.kwargs["item"]
|
||||
else:
|
||||
content = position.converted
|
||||
|
||||
self.diagrams[el_id] = EditablePartial.from_call(
|
||||
NamedDiagram,
|
||||
name=position.name,
|
||||
diagram=EditablePartial.from_call(
|
||||
railroad.Diagram, content, **self.diagram_kwargs
|
||||
),
|
||||
index=position.number,
|
||||
)
|
||||
|
||||
del self[el_id]
|
||||
|
||||
|
||||
def _worth_extracting(element: pyparsing.ParserElement) -> bool:
|
||||
"""
|
||||
Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
|
||||
themselves have children, then its complex enough to extract
|
||||
"""
|
||||
children = element.recurse()
|
||||
return any(child.recurse() for child in children)
|
||||
|
||||
|
||||
def _apply_diagram_item_enhancements(fn):
|
||||
"""
|
||||
decorator to ensure enhancements to a diagram item (such as results name annotations)
|
||||
get applied on return from _to_diagram_element (we do this since there are several
|
||||
returns in _to_diagram_element)
|
||||
"""
|
||||
|
||||
def _inner(
|
||||
element: pyparsing.ParserElement,
|
||||
parent: Optional[EditablePartial],
|
||||
lookup: ConverterState = None,
|
||||
vertical: int = None,
|
||||
index: int = 0,
|
||||
name_hint: str = None,
|
||||
show_results_names: bool = False,
|
||||
) -> Optional[EditablePartial]:
|
||||
|
||||
ret = fn(
|
||||
element,
|
||||
parent,
|
||||
lookup,
|
||||
vertical,
|
||||
index,
|
||||
name_hint,
|
||||
show_results_names,
|
||||
)
|
||||
|
||||
# apply annotation for results name, if present
|
||||
if show_results_names and ret is not None:
|
||||
element_results_name = element.resultsName
|
||||
if element_results_name:
|
||||
# add "*" to indicate if this is a "list all results" name
|
||||
element_results_name += "" if element.modalResults else "*"
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.Group, item=ret, label=element_results_name
|
||||
)
|
||||
|
||||
return ret
|
||||
|
||||
return _inner
|
||||
|
||||
|
||||
def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
|
||||
non_diagramming_exprs = (
|
||||
pyparsing.ParseElementEnhance,
|
||||
pyparsing.PositionToken,
|
||||
pyparsing.And._ErrorStop,
|
||||
)
|
||||
return [
|
||||
e
|
||||
for e in exprs
|
||||
if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
|
||||
]
|
||||
|
||||
|
||||
@_apply_diagram_item_enhancements
|
||||
def _to_diagram_element(
|
||||
element: pyparsing.ParserElement,
|
||||
parent: Optional[EditablePartial],
|
||||
lookup: ConverterState = None,
|
||||
vertical: int = None,
|
||||
index: int = 0,
|
||||
name_hint: str = None,
|
||||
show_results_names: bool = False,
|
||||
) -> Optional[EditablePartial]:
|
||||
"""
|
||||
Recursively converts a PyParsing Element to a railroad Element
|
||||
:param lookup: The shared converter state that keeps track of useful things
|
||||
:param index: The index of this element within the parent
|
||||
:param parent: The parent of this element in the output tree
|
||||
:param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
|
||||
it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
|
||||
do so
|
||||
:param name_hint: If provided, this will override the generated name
|
||||
:param show_results_names: bool flag indicating whether to add annotations for results names
|
||||
:returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
|
||||
"""
|
||||
exprs = element.recurse()
|
||||
name = name_hint or element.customName or element.__class__.__name__
|
||||
|
||||
# Python's id() is used to provide a unique identifier for elements
|
||||
el_id = id(element)
|
||||
|
||||
element_results_name = element.resultsName
|
||||
|
||||
# Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
|
||||
if not element.customName:
|
||||
if isinstance(
|
||||
element,
|
||||
(
|
||||
pyparsing.TokenConverter,
|
||||
# pyparsing.Forward,
|
||||
pyparsing.Located,
|
||||
),
|
||||
):
|
||||
# However, if this element has a useful custom name, and its child does not, we can pass it on to the child
|
||||
if exprs:
|
||||
if not exprs[0].customName:
|
||||
propagated_name = name
|
||||
else:
|
||||
propagated_name = None
|
||||
|
||||
return _to_diagram_element(
|
||||
element.expr,
|
||||
parent=parent,
|
||||
lookup=lookup,
|
||||
vertical=vertical,
|
||||
index=index,
|
||||
name_hint=propagated_name,
|
||||
show_results_names=show_results_names,
|
||||
)
|
||||
|
||||
# If the element isn't worth extracting, we always treat it as the first time we say it
|
||||
if _worth_extracting(element):
|
||||
if el_id in lookup:
|
||||
# If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
|
||||
# so we have to extract it into a new diagram.
|
||||
looked_up = lookup[el_id]
|
||||
looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
|
||||
ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
|
||||
return ret
|
||||
|
||||
elif el_id in lookup.diagrams:
|
||||
# If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
|
||||
# just put in a marker element that refers to the sub-diagram
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
|
||||
)
|
||||
return ret
|
||||
|
||||
# Recursively convert child elements
|
||||
# Here we find the most relevant Railroad element for matching pyparsing Element
|
||||
# We use ``items=[]`` here to hold the place for where the child elements will go once created
|
||||
if isinstance(element, pyparsing.And):
|
||||
# detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
|
||||
# (all will have the same name, and resultsName)
|
||||
if not exprs:
|
||||
return None
|
||||
if len(set((e.name, e.resultsName) for e in exprs)) == 1:
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.OneOrMore, item="", repeat=str(len(exprs))
|
||||
)
|
||||
elif _should_vertical(vertical, exprs):
|
||||
ret = EditablePartial.from_call(railroad.Stack, items=[])
|
||||
else:
|
||||
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
||||
elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
|
||||
if not exprs:
|
||||
return None
|
||||
if _should_vertical(vertical, exprs):
|
||||
ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
|
||||
else:
|
||||
ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
|
||||
elif isinstance(element, pyparsing.Each):
|
||||
if not exprs:
|
||||
return None
|
||||
ret = EditablePartial.from_call(EachItem, items=[])
|
||||
elif isinstance(element, pyparsing.NotAny):
|
||||
ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
|
||||
elif isinstance(element, pyparsing.FollowedBy):
|
||||
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
|
||||
elif isinstance(element, pyparsing.PrecededBy):
|
||||
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
|
||||
elif isinstance(element, pyparsing.Opt):
|
||||
ret = EditablePartial.from_call(railroad.Optional, item="")
|
||||
elif isinstance(element, pyparsing.OneOrMore):
|
||||
ret = EditablePartial.from_call(railroad.OneOrMore, item="")
|
||||
elif isinstance(element, pyparsing.ZeroOrMore):
|
||||
ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
|
||||
elif isinstance(element, pyparsing.Group):
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.Group, item=None, label=element_results_name
|
||||
)
|
||||
elif isinstance(element, pyparsing.Empty) and not element.customName:
|
||||
# Skip unnamed "Empty" elements
|
||||
ret = None
|
||||
elif len(exprs) > 1:
|
||||
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
||||
elif len(exprs) > 0 and not element_results_name:
|
||||
ret = EditablePartial.from_call(railroad.Group, item="", label=name)
|
||||
else:
|
||||
terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
|
||||
ret = terminal
|
||||
|
||||
if ret is None:
|
||||
return
|
||||
|
||||
# Indicate this element's position in the tree so we can extract it if necessary
|
||||
lookup[el_id] = ElementState(
|
||||
element=element,
|
||||
converted=ret,
|
||||
parent=parent,
|
||||
parent_index=index,
|
||||
number=lookup.generate_index(),
|
||||
)
|
||||
if element.customName:
|
||||
lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
|
||||
|
||||
i = 0
|
||||
for expr in exprs:
|
||||
# Add a placeholder index in case we have to extract the child before we even add it to the parent
|
||||
if "items" in ret.kwargs:
|
||||
ret.kwargs["items"].insert(i, None)
|
||||
|
||||
item = _to_diagram_element(
|
||||
expr,
|
||||
parent=ret,
|
||||
lookup=lookup,
|
||||
vertical=vertical,
|
||||
index=i,
|
||||
show_results_names=show_results_names,
|
||||
)
|
||||
|
||||
# Some elements don't need to be shown in the diagram
|
||||
if item is not None:
|
||||
if "item" in ret.kwargs:
|
||||
ret.kwargs["item"] = item
|
||||
elif "items" in ret.kwargs:
|
||||
# If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
|
||||
ret.kwargs["items"][i] = item
|
||||
i += 1
|
||||
elif "items" in ret.kwargs:
|
||||
# If we're supposed to skip this element, remove it from the parent
|
||||
del ret.kwargs["items"][i]
|
||||
|
||||
# If all this items children are none, skip this item
|
||||
if ret and (
|
||||
("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
|
||||
or ("item" in ret.kwargs and ret.kwargs["item"] is None)
|
||||
):
|
||||
ret = EditablePartial.from_call(railroad.Terminal, name)
|
||||
|
||||
# Mark this element as "complete", ie it has all of its children
|
||||
if el_id in lookup:
|
||||
lookup[el_id].complete = True
|
||||
|
||||
if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
|
||||
lookup.extract_into_diagram(el_id)
|
||||
if ret is not None:
|
||||
ret = EditablePartial.from_call(
|
||||
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
|
||||
)
|
||||
|
||||
return ret
|
|
@ -0,0 +1,26 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
{% if not head %}
|
||||
<style type="text/css">
|
||||
.railroad-heading {
|
||||
font-family: monospace;
|
||||
}
|
||||
</style>
|
||||
{% else %}
|
||||
{{ hear | safe }}
|
||||
{% endif %}
|
||||
</head>
|
||||
<body>
|
||||
{{ body | safe }}
|
||||
{% for diagram in diagrams %}
|
||||
<div class="railroad-group">
|
||||
<h1 class="railroad-heading">{{ diagram.title }}</h1>
|
||||
<div class="railroad-description">{{ diagram.text }}</div>
|
||||
<div class="railroad-svg">
|
||||
{{ diagram.svg }}
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,267 @@
|
|||
# exceptions.py
|
||||
|
||||
import re
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
from .util import col, line, lineno, _collapse_string_to_ranges
|
||||
from .unicode import pyparsing_unicode as ppu
|
||||
|
||||
|
||||
class ExceptionWordUnicode(ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic):
|
||||
pass
|
||||
|
||||
|
||||
_extract_alphanums = _collapse_string_to_ranges(ExceptionWordUnicode.alphanums)
|
||||
_exception_word_extractor = re.compile("([" + _extract_alphanums + "]{1,16})|.")
|
||||
|
||||
|
||||
class ParseBaseException(Exception):
|
||||
"""base exception class for all parsing runtime exceptions"""
|
||||
|
||||
# Performance tuning: we construct a *lot* of these, so keep this
|
||||
# constructor as small and fast as possible
|
||||
def __init__(
|
||||
self,
|
||||
pstr: str,
|
||||
loc: int = 0,
|
||||
msg: Optional[str] = None,
|
||||
elem=None,
|
||||
):
|
||||
self.loc = loc
|
||||
if msg is None:
|
||||
self.msg = pstr
|
||||
self.pstr = ""
|
||||
else:
|
||||
self.msg = msg
|
||||
self.pstr = pstr
|
||||
self.parser_element = self.parserElement = elem
|
||||
self.args = (pstr, loc, msg)
|
||||
|
||||
@staticmethod
|
||||
def explain_exception(exc, depth=16):
|
||||
"""
|
||||
Method to take an exception and translate the Python internal traceback into a list
|
||||
of the pyparsing expressions that caused the exception to be raised.
|
||||
|
||||
Parameters:
|
||||
|
||||
- exc - exception raised during parsing (need not be a ParseException, in support
|
||||
of Python exceptions that might be raised in a parse action)
|
||||
- depth (default=16) - number of levels back in the stack trace to list expression
|
||||
and function names; if None, the full stack trace names will be listed; if 0, only
|
||||
the failing input line, marker, and exception string will be shown
|
||||
|
||||
Returns a multi-line string listing the ParserElements and/or function names in the
|
||||
exception's stack trace.
|
||||
"""
|
||||
import inspect
|
||||
from .core import ParserElement
|
||||
|
||||
if depth is None:
|
||||
depth = sys.getrecursionlimit()
|
||||
ret = []
|
||||
if isinstance(exc, ParseBaseException):
|
||||
ret.append(exc.line)
|
||||
ret.append(" " * (exc.column - 1) + "^")
|
||||
ret.append("{}: {}".format(type(exc).__name__, exc))
|
||||
|
||||
if depth > 0:
|
||||
callers = inspect.getinnerframes(exc.__traceback__, context=depth)
|
||||
seen = set()
|
||||
for i, ff in enumerate(callers[-depth:]):
|
||||
frm = ff[0]
|
||||
|
||||
f_self = frm.f_locals.get("self", None)
|
||||
if isinstance(f_self, ParserElement):
|
||||
if frm.f_code.co_name not in ("parseImpl", "_parseNoCache"):
|
||||
continue
|
||||
if id(f_self) in seen:
|
||||
continue
|
||||
seen.add(id(f_self))
|
||||
|
||||
self_type = type(f_self)
|
||||
ret.append(
|
||||
"{}.{} - {}".format(
|
||||
self_type.__module__, self_type.__name__, f_self
|
||||
)
|
||||
)
|
||||
|
||||
elif f_self is not None:
|
||||
self_type = type(f_self)
|
||||
ret.append("{}.{}".format(self_type.__module__, self_type.__name__))
|
||||
|
||||
else:
|
||||
code = frm.f_code
|
||||
if code.co_name in ("wrapper", "<module>"):
|
||||
continue
|
||||
|
||||
ret.append("{}".format(code.co_name))
|
||||
|
||||
depth -= 1
|
||||
if not depth:
|
||||
break
|
||||
|
||||
return "\n".join(ret)
|
||||
|
||||
@classmethod
|
||||
def _from_exception(cls, pe):
|
||||
"""
|
||||
internal factory method to simplify creating one type of ParseException
|
||||
from another - avoids having __init__ signature conflicts among subclasses
|
||||
"""
|
||||
return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
|
||||
|
||||
@property
|
||||
def line(self) -> str:
|
||||
"""
|
||||
Return the line of text where the exception occurred.
|
||||
"""
|
||||
return line(self.loc, self.pstr)
|
||||
|
||||
@property
|
||||
def lineno(self) -> int:
|
||||
"""
|
||||
Return the 1-based line number of text where the exception occurred.
|
||||
"""
|
||||
return lineno(self.loc, self.pstr)
|
||||
|
||||
@property
|
||||
def col(self) -> int:
|
||||
"""
|
||||
Return the 1-based column on the line of text where the exception occurred.
|
||||
"""
|
||||
return col(self.loc, self.pstr)
|
||||
|
||||
@property
|
||||
def column(self) -> int:
|
||||
"""
|
||||
Return the 1-based column on the line of text where the exception occurred.
|
||||
"""
|
||||
return col(self.loc, self.pstr)
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.pstr:
|
||||
if self.loc >= len(self.pstr):
|
||||
foundstr = ", found end of text"
|
||||
else:
|
||||
# pull out next word at error location
|
||||
found_match = _exception_word_extractor.match(self.pstr, self.loc)
|
||||
if found_match is not None:
|
||||
found = found_match.group(0)
|
||||
else:
|
||||
found = self.pstr[self.loc : self.loc + 1]
|
||||
foundstr = (", found %r" % found).replace(r"\\", "\\")
|
||||
else:
|
||||
foundstr = ""
|
||||
return "{}{} (at char {}), (line:{}, col:{})".format(
|
||||
self.msg, foundstr, self.loc, self.lineno, self.column
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def mark_input_line(self, marker_string: str = None, *, markerString=">!<") -> str:
|
||||
"""
|
||||
Extracts the exception line from the input string, and marks
|
||||
the location of the exception with a special symbol.
|
||||
"""
|
||||
markerString = marker_string if marker_string is not None else markerString
|
||||
line_str = self.line
|
||||
line_column = self.column - 1
|
||||
if markerString:
|
||||
line_str = "".join(
|
||||
(line_str[:line_column], markerString, line_str[line_column:])
|
||||
)
|
||||
return line_str.strip()
|
||||
|
||||
def explain(self, depth=16) -> str:
|
||||
"""
|
||||
Method to translate the Python internal traceback into a list
|
||||
of the pyparsing expressions that caused the exception to be raised.
|
||||
|
||||
Parameters:
|
||||
|
||||
- depth (default=16) - number of levels back in the stack trace to list expression
|
||||
and function names; if None, the full stack trace names will be listed; if 0, only
|
||||
the failing input line, marker, and exception string will be shown
|
||||
|
||||
Returns a multi-line string listing the ParserElements and/or function names in the
|
||||
exception's stack trace.
|
||||
|
||||
Example::
|
||||
|
||||
expr = pp.Word(pp.nums) * 3
|
||||
try:
|
||||
expr.parse_string("123 456 A789")
|
||||
except pp.ParseException as pe:
|
||||
print(pe.explain(depth=0))
|
||||
|
||||
prints::
|
||||
|
||||
123 456 A789
|
||||
^
|
||||
ParseException: Expected W:(0-9), found 'A' (at char 8), (line:1, col:9)
|
||||
|
||||
Note: the diagnostic output will include string representations of the expressions
|
||||
that failed to parse. These representations will be more helpful if you use `set_name` to
|
||||
give identifiable names to your expressions. Otherwise they will use the default string
|
||||
forms, which may be cryptic to read.
|
||||
|
||||
Note: pyparsing's default truncation of exception tracebacks may also truncate the
|
||||
stack of expressions that are displayed in the ``explain`` output. To get the full listing
|
||||
of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True``
|
||||
"""
|
||||
return self.explain_exception(self, depth)
|
||||
|
||||
markInputline = mark_input_line
|
||||
|
||||
|
||||
class ParseException(ParseBaseException):
|
||||
"""
|
||||
Exception thrown when a parse expression doesn't match the input string
|
||||
|
||||
Example::
|
||||
|
||||
try:
|
||||
Word(nums).set_name("integer").parse_string("ABC")
|
||||
except ParseException as pe:
|
||||
print(pe)
|
||||
print("column: {}".format(pe.column))
|
||||
|
||||
prints::
|
||||
|
||||
Expected integer (at char 0), (line:1, col:1)
|
||||
column: 1
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class ParseFatalException(ParseBaseException):
|
||||
"""
|
||||
User-throwable exception thrown when inconsistent parse content
|
||||
is found; stops all parsing immediately
|
||||
"""
|
||||
|
||||
|
||||
class ParseSyntaxException(ParseFatalException):
|
||||
"""
|
||||
Just like :class:`ParseFatalException`, but thrown internally
|
||||
when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
|
||||
that parsing is to stop immediately because an unbacktrackable
|
||||
syntax error has been found.
|
||||
"""
|
||||
|
||||
|
||||
class RecursiveGrammarException(Exception):
|
||||
"""
|
||||
Exception thrown by :class:`ParserElement.validate` if the
|
||||
grammar could be left-recursive; parser may need to enable
|
||||
left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>`
|
||||
"""
|
||||
|
||||
def __init__(self, parseElementList):
|
||||
self.parseElementTrace = parseElementList
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "RecursiveGrammarException: {}".format(self.parseElementTrace)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,760 @@
|
|||
# results.py
|
||||
from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator
|
||||
import pprint
|
||||
from weakref import ref as wkref
|
||||
from typing import Tuple, Any
|
||||
|
||||
str_type: Tuple[type, ...] = (str, bytes)
|
||||
_generator_type = type((_ for _ in ()))
|
||||
|
||||
|
||||
class _ParseResultsWithOffset:
|
||||
__slots__ = ["tup"]
|
||||
|
||||
def __init__(self, p1, p2):
|
||||
self.tup = (p1, p2)
|
||||
|
||||
def __getitem__(self, i):
|
||||
return self.tup[i]
|
||||
|
||||
def __getstate__(self):
|
||||
return self.tup
|
||||
|
||||
def __setstate__(self, *args):
|
||||
self.tup = args[0]
|
||||
|
||||
|
||||
class ParseResults:
|
||||
"""Structured parse results, to provide multiple means of access to
|
||||
the parsed data:
|
||||
|
||||
- as a list (``len(results)``)
|
||||
- by list index (``results[0], results[1]``, etc.)
|
||||
- by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
|
||||
|
||||
Example::
|
||||
|
||||
integer = Word(nums)
|
||||
date_str = (integer.set_results_name("year") + '/'
|
||||
+ integer.set_results_name("month") + '/'
|
||||
+ integer.set_results_name("day"))
|
||||
# equivalent form:
|
||||
# date_str = (integer("year") + '/'
|
||||
# + integer("month") + '/'
|
||||
# + integer("day"))
|
||||
|
||||
# parse_string returns a ParseResults object
|
||||
result = date_str.parse_string("1999/12/31")
|
||||
|
||||
def test(s, fn=repr):
|
||||
print("{} -> {}".format(s, fn(eval(s))))
|
||||
test("list(result)")
|
||||
test("result[0]")
|
||||
test("result['month']")
|
||||
test("result.day")
|
||||
test("'month' in result")
|
||||
test("'minutes' in result")
|
||||
test("result.dump()", str)
|
||||
|
||||
prints::
|
||||
|
||||
list(result) -> ['1999', '/', '12', '/', '31']
|
||||
result[0] -> '1999'
|
||||
result['month'] -> '12'
|
||||
result.day -> '31'
|
||||
'month' in result -> True
|
||||
'minutes' in result -> False
|
||||
result.dump() -> ['1999', '/', '12', '/', '31']
|
||||
- day: 31
|
||||
- month: 12
|
||||
- year: 1999
|
||||
"""
|
||||
|
||||
_null_values: Tuple[Any, ...] = (None, [], "", ())
|
||||
|
||||
__slots__ = [
|
||||
"_name",
|
||||
"_parent",
|
||||
"_all_names",
|
||||
"_modal",
|
||||
"_toklist",
|
||||
"_tokdict",
|
||||
"__weakref__",
|
||||
]
|
||||
|
||||
class List(list):
|
||||
"""
|
||||
Simple wrapper class to distinguish parsed list results that should be preserved
|
||||
as actual Python lists, instead of being converted to :class:`ParseResults`:
|
||||
|
||||
LBRACK, RBRACK = map(pp.Suppress, "[]")
|
||||
element = pp.Forward()
|
||||
item = ppc.integer
|
||||
element_list = LBRACK + pp.delimited_list(element) + RBRACK
|
||||
|
||||
# add parse actions to convert from ParseResults to actual Python collection types
|
||||
def as_python_list(t):
|
||||
return pp.ParseResults.List(t.as_list())
|
||||
element_list.add_parse_action(as_python_list)
|
||||
|
||||
element <<= item | element_list
|
||||
|
||||
element.run_tests('''
|
||||
100
|
||||
[2,3,4]
|
||||
[[2, 1],3,4]
|
||||
[(2, 1),3,4]
|
||||
(2,3,4)
|
||||
''', post_parse=lambda s, r: (r[0], type(r[0])))
|
||||
|
||||
prints:
|
||||
|
||||
100
|
||||
(100, <class 'int'>)
|
||||
|
||||
[2,3,4]
|
||||
([2, 3, 4], <class 'list'>)
|
||||
|
||||
[[2, 1],3,4]
|
||||
([[2, 1], 3, 4], <class 'list'>)
|
||||
|
||||
(Used internally by :class:`Group` when `aslist=True`.)
|
||||
"""
|
||||
|
||||
def __new__(cls, contained=None):
|
||||
if contained is None:
|
||||
contained = []
|
||||
|
||||
if not isinstance(contained, list):
|
||||
raise TypeError(
|
||||
"{} may only be constructed with a list,"
|
||||
" not {}".format(cls.__name__, type(contained).__name__)
|
||||
)
|
||||
|
||||
return list.__new__(cls)
|
||||
|
||||
def __new__(cls, toklist=None, name=None, **kwargs):
|
||||
if isinstance(toklist, ParseResults):
|
||||
return toklist
|
||||
self = object.__new__(cls)
|
||||
self._name = None
|
||||
self._parent = None
|
||||
self._all_names = set()
|
||||
|
||||
if toklist is None:
|
||||
self._toklist = []
|
||||
elif isinstance(toklist, (list, _generator_type)):
|
||||
self._toklist = (
|
||||
[toklist[:]]
|
||||
if isinstance(toklist, ParseResults.List)
|
||||
else list(toklist)
|
||||
)
|
||||
else:
|
||||
self._toklist = [toklist]
|
||||
self._tokdict = dict()
|
||||
return self
|
||||
|
||||
# Performance tuning: we construct a *lot* of these, so keep this
|
||||
# constructor as small and fast as possible
|
||||
def __init__(
|
||||
self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
|
||||
):
|
||||
self._modal = modal
|
||||
if name is not None and name != "":
|
||||
if isinstance(name, int):
|
||||
name = str(name)
|
||||
if not modal:
|
||||
self._all_names = {name}
|
||||
self._name = name
|
||||
if toklist not in self._null_values:
|
||||
if isinstance(toklist, (str_type, type)):
|
||||
toklist = [toklist]
|
||||
if asList:
|
||||
if isinstance(toklist, ParseResults):
|
||||
self[name] = _ParseResultsWithOffset(
|
||||
ParseResults(toklist._toklist), 0
|
||||
)
|
||||
else:
|
||||
self[name] = _ParseResultsWithOffset(
|
||||
ParseResults(toklist[0]), 0
|
||||
)
|
||||
self[name]._name = name
|
||||
else:
|
||||
try:
|
||||
self[name] = toklist[0]
|
||||
except (KeyError, TypeError, IndexError):
|
||||
if toklist is not self:
|
||||
self[name] = toklist
|
||||
else:
|
||||
self._name = name
|
||||
|
||||
def __getitem__(self, i):
|
||||
if isinstance(i, (int, slice)):
|
||||
return self._toklist[i]
|
||||
else:
|
||||
if i not in self._all_names:
|
||||
return self._tokdict[i][-1][0]
|
||||
else:
|
||||
return ParseResults([v[0] for v in self._tokdict[i]])
|
||||
|
||||
def __setitem__(self, k, v, isinstance=isinstance):
|
||||
if isinstance(v, _ParseResultsWithOffset):
|
||||
self._tokdict[k] = self._tokdict.get(k, list()) + [v]
|
||||
sub = v[0]
|
||||
elif isinstance(k, (int, slice)):
|
||||
self._toklist[k] = v
|
||||
sub = v
|
||||
else:
|
||||
self._tokdict[k] = self._tokdict.get(k, list()) + [
|
||||
_ParseResultsWithOffset(v, 0)
|
||||
]
|
||||
sub = v
|
||||
if isinstance(sub, ParseResults):
|
||||
sub._parent = wkref(self)
|
||||
|
||||
def __delitem__(self, i):
|
||||
if isinstance(i, (int, slice)):
|
||||
mylen = len(self._toklist)
|
||||
del self._toklist[i]
|
||||
|
||||
# convert int to slice
|
||||
if isinstance(i, int):
|
||||
if i < 0:
|
||||
i += mylen
|
||||
i = slice(i, i + 1)
|
||||
# get removed indices
|
||||
removed = list(range(*i.indices(mylen)))
|
||||
removed.reverse()
|
||||
# fixup indices in token dictionary
|
||||
for name, occurrences in self._tokdict.items():
|
||||
for j in removed:
|
||||
for k, (value, position) in enumerate(occurrences):
|
||||
occurrences[k] = _ParseResultsWithOffset(
|
||||
value, position - (position > j)
|
||||
)
|
||||
else:
|
||||
del self._tokdict[i]
|
||||
|
||||
def __contains__(self, k) -> bool:
|
||||
return k in self._tokdict
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._toklist)
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return not not (self._toklist or self._tokdict)
|
||||
|
||||
def __iter__(self) -> Iterator:
|
||||
return iter(self._toklist)
|
||||
|
||||
def __reversed__(self) -> Iterator:
|
||||
return iter(self._toklist[::-1])
|
||||
|
||||
def keys(self):
|
||||
return iter(self._tokdict)
|
||||
|
||||
def values(self):
|
||||
return (self[k] for k in self.keys())
|
||||
|
||||
def items(self):
|
||||
return ((k, self[k]) for k in self.keys())
|
||||
|
||||
def haskeys(self) -> bool:
|
||||
"""
|
||||
Since ``keys()`` returns an iterator, this method is helpful in bypassing
|
||||
code that looks for the existence of any defined results names."""
|
||||
return bool(self._tokdict)
|
||||
|
||||
def pop(self, *args, **kwargs):
|
||||
"""
|
||||
Removes and returns item at specified index (default= ``last``).
|
||||
Supports both ``list`` and ``dict`` semantics for ``pop()``. If
|
||||
passed no argument or an integer argument, it will use ``list``
|
||||
semantics and pop tokens from the list of parsed tokens. If passed
|
||||
a non-integer argument (most likely a string), it will use ``dict``
|
||||
semantics and pop the corresponding value from any defined results
|
||||
names. A second default return value argument is supported, just as in
|
||||
``dict.pop()``.
|
||||
|
||||
Example::
|
||||
|
||||
numlist = Word(nums)[...]
|
||||
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
||||
|
||||
def remove_first(tokens):
|
||||
tokens.pop(0)
|
||||
numlist.add_parse_action(remove_first)
|
||||
print(numlist.parse_string("0 123 321")) # -> ['123', '321']
|
||||
|
||||
label = Word(alphas)
|
||||
patt = label("LABEL") + OneOrMore(Word(nums))
|
||||
print(patt.parse_string("AAB 123 321").dump())
|
||||
|
||||
# Use pop() in a parse action to remove named result (note that corresponding value is not
|
||||
# removed from list form of results)
|
||||
def remove_LABEL(tokens):
|
||||
tokens.pop("LABEL")
|
||||
return tokens
|
||||
patt.add_parse_action(remove_LABEL)
|
||||
print(patt.parse_string("AAB 123 321").dump())
|
||||
|
||||
prints::
|
||||
|
||||
['AAB', '123', '321']
|
||||
- LABEL: AAB
|
||||
|
||||
['AAB', '123', '321']
|
||||
"""
|
||||
if not args:
|
||||
args = [-1]
|
||||
for k, v in kwargs.items():
|
||||
if k == "default":
|
||||
args = (args[0], v)
|
||||
else:
|
||||
raise TypeError(
|
||||
"pop() got an unexpected keyword argument {!r}".format(k)
|
||||
)
|
||||
if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
|
||||
index = args[0]
|
||||
ret = self[index]
|
||||
del self[index]
|
||||
return ret
|
||||
else:
|
||||
defaultvalue = args[1]
|
||||
return defaultvalue
|
||||
|
||||
def get(self, key, default_value=None):
|
||||
"""
|
||||
Returns named result matching the given key, or if there is no
|
||||
such name, then returns the given ``default_value`` or ``None`` if no
|
||||
``default_value`` is specified.
|
||||
|
||||
Similar to ``dict.get()``.
|
||||
|
||||
Example::
|
||||
|
||||
integer = Word(nums)
|
||||
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||
|
||||
result = date_str.parse_string("1999/12/31")
|
||||
print(result.get("year")) # -> '1999'
|
||||
print(result.get("hour", "not specified")) # -> 'not specified'
|
||||
print(result.get("hour")) # -> None
|
||||
"""
|
||||
if key in self:
|
||||
return self[key]
|
||||
else:
|
||||
return default_value
|
||||
|
||||
def insert(self, index, ins_string):
|
||||
"""
|
||||
Inserts new element at location index in the list of parsed tokens.
|
||||
|
||||
Similar to ``list.insert()``.
|
||||
|
||||
Example::
|
||||
|
||||
numlist = Word(nums)[...]
|
||||
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
||||
|
||||
# use a parse action to insert the parse location in the front of the parsed results
|
||||
def insert_locn(locn, tokens):
|
||||
tokens.insert(0, locn)
|
||||
numlist.add_parse_action(insert_locn)
|
||||
print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
|
||||
"""
|
||||
self._toklist.insert(index, ins_string)
|
||||
# fixup indices in token dictionary
|
||||
for name, occurrences in self._tokdict.items():
|
||||
for k, (value, position) in enumerate(occurrences):
|
||||
occurrences[k] = _ParseResultsWithOffset(
|
||||
value, position + (position > index)
|
||||
)
|
||||
|
||||
def append(self, item):
|
||||
"""
|
||||
Add single element to end of ``ParseResults`` list of elements.
|
||||
|
||||
Example::
|
||||
|
||||
numlist = Word(nums)[...]
|
||||
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
||||
|
||||
# use a parse action to compute the sum of the parsed integers, and add it to the end
|
||||
def append_sum(tokens):
|
||||
tokens.append(sum(map(int, tokens)))
|
||||
numlist.add_parse_action(append_sum)
|
||||
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
|
||||
"""
|
||||
self._toklist.append(item)
|
||||
|
||||
def extend(self, itemseq):
|
||||
"""
|
||||
Add sequence of elements to end of ``ParseResults`` list of elements.
|
||||
|
||||
Example::
|
||||
|
||||
patt = OneOrMore(Word(alphas))
|
||||
|
||||
# use a parse action to append the reverse of the matched strings, to make a palindrome
|
||||
def make_palindrome(tokens):
|
||||
tokens.extend(reversed([t[::-1] for t in tokens]))
|
||||
return ''.join(tokens)
|
||||
patt.add_parse_action(make_palindrome)
|
||||
print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
|
||||
"""
|
||||
if isinstance(itemseq, ParseResults):
|
||||
self.__iadd__(itemseq)
|
||||
else:
|
||||
self._toklist.extend(itemseq)
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
Clear all elements and results names.
|
||||
"""
|
||||
del self._toklist[:]
|
||||
self._tokdict.clear()
|
||||
|
||||
def __getattr__(self, name):
|
||||
try:
|
||||
return self[name]
|
||||
except KeyError:
|
||||
if name.startswith("__"):
|
||||
raise AttributeError(name)
|
||||
return ""
|
||||
|
||||
def __add__(self, other) -> "ParseResults":
|
||||
ret = self.copy()
|
||||
ret += other
|
||||
return ret
|
||||
|
||||
def __iadd__(self, other) -> "ParseResults":
|
||||
if other._tokdict:
|
||||
offset = len(self._toklist)
|
||||
addoffset = lambda a: offset if a < 0 else a + offset
|
||||
otheritems = other._tokdict.items()
|
||||
otherdictitems = [
|
||||
(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
|
||||
for k, vlist in otheritems
|
||||
for v in vlist
|
||||
]
|
||||
for k, v in otherdictitems:
|
||||
self[k] = v
|
||||
if isinstance(v[0], ParseResults):
|
||||
v[0]._parent = wkref(self)
|
||||
|
||||
self._toklist += other._toklist
|
||||
self._all_names |= other._all_names
|
||||
return self
|
||||
|
||||
def __radd__(self, other) -> "ParseResults":
|
||||
if isinstance(other, int) and other == 0:
|
||||
# useful for merging many ParseResults using sum() builtin
|
||||
return self.copy()
|
||||
else:
|
||||
# this may raise a TypeError - so be it
|
||||
return other + self
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict())
|
||||
|
||||
def __str__(self) -> str:
|
||||
return (
|
||||
"["
|
||||
+ ", ".join(
|
||||
[
|
||||
str(i) if isinstance(i, ParseResults) else repr(i)
|
||||
for i in self._toklist
|
||||
]
|
||||
)
|
||||
+ "]"
|
||||
)
|
||||
|
||||
def _asStringList(self, sep=""):
|
||||
out = []
|
||||
for item in self._toklist:
|
||||
if out and sep:
|
||||
out.append(sep)
|
||||
if isinstance(item, ParseResults):
|
||||
out += item._asStringList()
|
||||
else:
|
||||
out.append(str(item))
|
||||
return out
|
||||
|
||||
def as_list(self) -> list:
|
||||
"""
|
||||
Returns the parse results as a nested list of matching tokens, all converted to strings.
|
||||
|
||||
Example::
|
||||
|
||||
patt = OneOrMore(Word(alphas))
|
||||
result = patt.parse_string("sldkj lsdkj sldkj")
|
||||
# even though the result prints in string-like form, it is actually a pyparsing ParseResults
|
||||
print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
|
||||
|
||||
# Use as_list() to create an actual list
|
||||
result_list = result.as_list()
|
||||
print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
|
||||
"""
|
||||
return [
|
||||
res.as_list() if isinstance(res, ParseResults) else res
|
||||
for res in self._toklist
|
||||
]
|
||||
|
||||
def as_dict(self) -> dict:
|
||||
"""
|
||||
Returns the named parse results as a nested dictionary.
|
||||
|
||||
Example::
|
||||
|
||||
integer = Word(nums)
|
||||
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||
|
||||
result = date_str.parse_string('12/31/1999')
|
||||
print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
|
||||
|
||||
result_dict = result.as_dict()
|
||||
print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
|
||||
|
||||
# even though a ParseResults supports dict-like access, sometime you just need to have a dict
|
||||
import json
|
||||
print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
|
||||
print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
|
||||
"""
|
||||
|
||||
def to_item(obj):
|
||||
if isinstance(obj, ParseResults):
|
||||
return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
|
||||
else:
|
||||
return obj
|
||||
|
||||
return dict((k, to_item(v)) for k, v in self.items())
|
||||
|
||||
def copy(self) -> "ParseResults":
|
||||
"""
|
||||
Returns a new copy of a :class:`ParseResults` object.
|
||||
"""
|
||||
ret = ParseResults(self._toklist)
|
||||
ret._tokdict = self._tokdict.copy()
|
||||
ret._parent = self._parent
|
||||
ret._all_names |= self._all_names
|
||||
ret._name = self._name
|
||||
return ret
|
||||
|
||||
def get_name(self):
|
||||
r"""
|
||||
Returns the results name for this token expression. Useful when several
|
||||
different expressions might match at a particular location.
|
||||
|
||||
Example::
|
||||
|
||||
integer = Word(nums)
|
||||
ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
|
||||
house_number_expr = Suppress('#') + Word(nums, alphanums)
|
||||
user_data = (Group(house_number_expr)("house_number")
|
||||
| Group(ssn_expr)("ssn")
|
||||
| Group(integer)("age"))
|
||||
user_info = OneOrMore(user_data)
|
||||
|
||||
result = user_info.parse_string("22 111-22-3333 #221B")
|
||||
for item in result:
|
||||
print(item.get_name(), ':', item[0])
|
||||
|
||||
prints::
|
||||
|
||||
age : 22
|
||||
ssn : 111-22-3333
|
||||
house_number : 221B
|
||||
"""
|
||||
if self._name:
|
||||
return self._name
|
||||
elif self._parent:
|
||||
par = self._parent()
|
||||
|
||||
def find_in_parent(sub):
|
||||
return next(
|
||||
(
|
||||
k
|
||||
for k, vlist in par._tokdict.items()
|
||||
for v, loc in vlist
|
||||
if sub is v
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
return find_in_parent(self) if par else None
|
||||
elif (
|
||||
len(self) == 1
|
||||
and len(self._tokdict) == 1
|
||||
and next(iter(self._tokdict.values()))[0][1] in (0, -1)
|
||||
):
|
||||
return next(iter(self._tokdict.keys()))
|
||||
else:
|
||||
return None
|
||||
|
||||
def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
|
||||
"""
|
||||
Diagnostic method for listing out the contents of
|
||||
a :class:`ParseResults`. Accepts an optional ``indent`` argument so
|
||||
that this string can be embedded in a nested display of other data.
|
||||
|
||||
Example::
|
||||
|
||||
integer = Word(nums)
|
||||
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
||||
|
||||
result = date_str.parse_string('12/31/1999')
|
||||
print(result.dump())
|
||||
|
||||
prints::
|
||||
|
||||
['12', '/', '31', '/', '1999']
|
||||
- day: 1999
|
||||
- month: 31
|
||||
- year: 12
|
||||
"""
|
||||
out = []
|
||||
NL = "\n"
|
||||
out.append(indent + str(self.as_list()) if include_list else "")
|
||||
|
||||
if full:
|
||||
if self.haskeys():
|
||||
items = sorted((str(k), v) for k, v in self.items())
|
||||
for k, v in items:
|
||||
if out:
|
||||
out.append(NL)
|
||||
out.append("{}{}- {}: ".format(indent, (" " * _depth), k))
|
||||
if isinstance(v, ParseResults):
|
||||
if v:
|
||||
out.append(
|
||||
v.dump(
|
||||
indent=indent,
|
||||
full=full,
|
||||
include_list=include_list,
|
||||
_depth=_depth + 1,
|
||||
)
|
||||
)
|
||||
else:
|
||||
out.append(str(v))
|
||||
else:
|
||||
out.append(repr(v))
|
||||
if any(isinstance(vv, ParseResults) for vv in self):
|
||||
v = self
|
||||
for i, vv in enumerate(v):
|
||||
if isinstance(vv, ParseResults):
|
||||
out.append(
|
||||
"\n{}{}[{}]:\n{}{}{}".format(
|
||||
indent,
|
||||
(" " * (_depth)),
|
||||
i,
|
||||
indent,
|
||||
(" " * (_depth + 1)),
|
||||
vv.dump(
|
||||
indent=indent,
|
||||
full=full,
|
||||
include_list=include_list,
|
||||
_depth=_depth + 1,
|
||||
),
|
||||
)
|
||||
)
|
||||
else:
|
||||
out.append(
|
||||
"\n%s%s[%d]:\n%s%s%s"
|
||||
% (
|
||||
indent,
|
||||
(" " * (_depth)),
|
||||
i,
|
||||
indent,
|
||||
(" " * (_depth + 1)),
|
||||
str(vv),
|
||||
)
|
||||
)
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def pprint(self, *args, **kwargs):
|
||||
"""
|
||||
Pretty-printer for parsed results as a list, using the
|
||||
`pprint <https://docs.python.org/3/library/pprint.html>`_ module.
|
||||
Accepts additional positional or keyword args as defined for
|
||||
`pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
|
||||
|
||||
Example::
|
||||
|
||||
ident = Word(alphas, alphanums)
|
||||
num = Word(nums)
|
||||
func = Forward()
|
||||
term = ident | num | Group('(' + func + ')')
|
||||
func <<= ident + Group(Optional(delimited_list(term)))
|
||||
result = func.parse_string("fna a,b,(fnb c,d,200),100")
|
||||
result.pprint(width=40)
|
||||
|
||||
prints::
|
||||
|
||||
['fna',
|
||||
['a',
|
||||
'b',
|
||||
['(', 'fnb', ['c', 'd', '200'], ')'],
|
||||
'100']]
|
||||
"""
|
||||
pprint.pprint(self.as_list(), *args, **kwargs)
|
||||
|
||||
# add support for pickle protocol
|
||||
def __getstate__(self):
|
||||
return (
|
||||
self._toklist,
|
||||
(
|
||||
self._tokdict.copy(),
|
||||
self._parent is not None and self._parent() or None,
|
||||
self._all_names,
|
||||
self._name,
|
||||
),
|
||||
)
|
||||
|
||||
def __setstate__(self, state):
|
||||
self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
|
||||
self._all_names = set(inAccumNames)
|
||||
if par is not None:
|
||||
self._parent = wkref(par)
|
||||
else:
|
||||
self._parent = None
|
||||
|
||||
def __getnewargs__(self):
|
||||
return self._toklist, self._name
|
||||
|
||||
def __dir__(self):
|
||||
return dir(type(self)) + list(self.keys())
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, other, name=None) -> "ParseResults":
|
||||
"""
|
||||
Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
|
||||
name-value relations as results names. If an optional ``name`` argument is
|
||||
given, a nested ``ParseResults`` will be returned.
|
||||
"""
|
||||
|
||||
def is_iterable(obj):
|
||||
try:
|
||||
iter(obj)
|
||||
except Exception:
|
||||
return False
|
||||
else:
|
||||
return not isinstance(obj, str_type)
|
||||
|
||||
ret = cls([])
|
||||
for k, v in other.items():
|
||||
if isinstance(v, Mapping):
|
||||
ret += cls.from_dict(v, name=k)
|
||||
else:
|
||||
ret += cls([v], name=k, asList=is_iterable(v))
|
||||
if name is not None:
|
||||
ret = cls([ret], name=name)
|
||||
return ret
|
||||
|
||||
asList = as_list
|
||||
asDict = as_dict
|
||||
getName = get_name
|
||||
|
||||
|
||||
MutableMapping.register(ParseResults)
|
||||
MutableSequence.register(ParseResults)
|
|
@ -0,0 +1,331 @@
|
|||
# testing.py
|
||||
|
||||
from contextlib import contextmanager
|
||||
from typing import Optional
|
||||
|
||||
from .core import (
|
||||
ParserElement,
|
||||
ParseException,
|
||||
Keyword,
|
||||
__diag__,
|
||||
__compat__,
|
||||
)
|
||||
|
||||
|
||||
class pyparsing_test:
|
||||
"""
|
||||
namespace class for classes useful in writing unit tests
|
||||
"""
|
||||
|
||||
class reset_pyparsing_context:
|
||||
"""
|
||||
Context manager to be used when writing unit tests that modify pyparsing config values:
|
||||
- packrat parsing
|
||||
- bounded recursion parsing
|
||||
- default whitespace characters.
|
||||
- default keyword characters
|
||||
- literal string auto-conversion class
|
||||
- __diag__ settings
|
||||
|
||||
Example::
|
||||
|
||||
with reset_pyparsing_context():
|
||||
# test that literals used to construct a grammar are automatically suppressed
|
||||
ParserElement.inlineLiteralsUsing(Suppress)
|
||||
|
||||
term = Word(alphas) | Word(nums)
|
||||
group = Group('(' + term[...] + ')')
|
||||
|
||||
# assert that the '()' characters are not included in the parsed tokens
|
||||
self.assertParseAndCheckList(group, "(abc 123 def)", ['abc', '123', 'def'])
|
||||
|
||||
# after exiting context manager, literals are converted to Literal expressions again
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._save_context = {}
|
||||
|
||||
def save(self):
|
||||
self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
|
||||
self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
|
||||
|
||||
self._save_context[
|
||||
"literal_string_class"
|
||||
] = ParserElement._literalStringClass
|
||||
|
||||
self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace
|
||||
|
||||
self._save_context["packrat_enabled"] = ParserElement._packratEnabled
|
||||
if ParserElement._packratEnabled:
|
||||
self._save_context[
|
||||
"packrat_cache_size"
|
||||
] = ParserElement.packrat_cache.size
|
||||
else:
|
||||
self._save_context["packrat_cache_size"] = None
|
||||
self._save_context["packrat_parse"] = ParserElement._parse
|
||||
self._save_context[
|
||||
"recursion_enabled"
|
||||
] = ParserElement._left_recursion_enabled
|
||||
|
||||
self._save_context["__diag__"] = {
|
||||
name: getattr(__diag__, name) for name in __diag__._all_names
|
||||
}
|
||||
|
||||
self._save_context["__compat__"] = {
|
||||
"collect_all_And_tokens": __compat__.collect_all_And_tokens
|
||||
}
|
||||
|
||||
return self
|
||||
|
||||
def restore(self):
|
||||
# reset pyparsing global state
|
||||
if (
|
||||
ParserElement.DEFAULT_WHITE_CHARS
|
||||
!= self._save_context["default_whitespace"]
|
||||
):
|
||||
ParserElement.set_default_whitespace_chars(
|
||||
self._save_context["default_whitespace"]
|
||||
)
|
||||
|
||||
ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"]
|
||||
|
||||
Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
|
||||
ParserElement.inlineLiteralsUsing(
|
||||
self._save_context["literal_string_class"]
|
||||
)
|
||||
|
||||
for name, value in self._save_context["__diag__"].items():
|
||||
(__diag__.enable if value else __diag__.disable)(name)
|
||||
|
||||
ParserElement._packratEnabled = False
|
||||
if self._save_context["packrat_enabled"]:
|
||||
ParserElement.enable_packrat(self._save_context["packrat_cache_size"])
|
||||
else:
|
||||
ParserElement._parse = self._save_context["packrat_parse"]
|
||||
ParserElement._left_recursion_enabled = self._save_context[
|
||||
"recursion_enabled"
|
||||
]
|
||||
|
||||
__compat__.collect_all_And_tokens = self._save_context["__compat__"]
|
||||
|
||||
return self
|
||||
|
||||
def copy(self):
|
||||
ret = type(self)()
|
||||
ret._save_context.update(self._save_context)
|
||||
return ret
|
||||
|
||||
def __enter__(self):
|
||||
return self.save()
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.restore()
|
||||
|
||||
class TestParseResultsAsserts:
|
||||
"""
|
||||
A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
|
||||
"""
|
||||
|
||||
def assertParseResultsEquals(
|
||||
self, result, expected_list=None, expected_dict=None, msg=None
|
||||
):
|
||||
"""
|
||||
Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``,
|
||||
and compare any defined results names with an optional ``expected_dict``.
|
||||
"""
|
||||
if expected_list is not None:
|
||||
self.assertEqual(expected_list, result.as_list(), msg=msg)
|
||||
if expected_dict is not None:
|
||||
self.assertEqual(expected_dict, result.as_dict(), msg=msg)
|
||||
|
||||
def assertParseAndCheckList(
|
||||
self, expr, test_string, expected_list, msg=None, verbose=True
|
||||
):
|
||||
"""
|
||||
Convenience wrapper assert to test a parser element and input string, and assert that
|
||||
the resulting ``ParseResults.asList()`` is equal to the ``expected_list``.
|
||||
"""
|
||||
result = expr.parse_string(test_string, parse_all=True)
|
||||
if verbose:
|
||||
print(result.dump())
|
||||
else:
|
||||
print(result.as_list())
|
||||
self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
|
||||
|
||||
def assertParseAndCheckDict(
|
||||
self, expr, test_string, expected_dict, msg=None, verbose=True
|
||||
):
|
||||
"""
|
||||
Convenience wrapper assert to test a parser element and input string, and assert that
|
||||
the resulting ``ParseResults.asDict()`` is equal to the ``expected_dict``.
|
||||
"""
|
||||
result = expr.parse_string(test_string, parseAll=True)
|
||||
if verbose:
|
||||
print(result.dump())
|
||||
else:
|
||||
print(result.as_list())
|
||||
self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
|
||||
|
||||
def assertRunTestResults(
|
||||
self, run_tests_report, expected_parse_results=None, msg=None
|
||||
):
|
||||
"""
|
||||
Unit test assertion to evaluate output of ``ParserElement.runTests()``. If a list of
|
||||
list-dict tuples is given as the ``expected_parse_results`` argument, then these are zipped
|
||||
with the report tuples returned by ``runTests`` and evaluated using ``assertParseResultsEquals``.
|
||||
Finally, asserts that the overall ``runTests()`` success value is ``True``.
|
||||
|
||||
:param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
|
||||
:param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
|
||||
"""
|
||||
run_test_success, run_test_results = run_tests_report
|
||||
|
||||
if expected_parse_results is not None:
|
||||
merged = [
|
||||
(*rpt, expected)
|
||||
for rpt, expected in zip(run_test_results, expected_parse_results)
|
||||
]
|
||||
for test_string, result, expected in merged:
|
||||
# expected should be a tuple containing a list and/or a dict or an exception,
|
||||
# and optional failure message string
|
||||
# an empty tuple will skip any result validation
|
||||
fail_msg = next(
|
||||
(exp for exp in expected if isinstance(exp, str)), None
|
||||
)
|
||||
expected_exception = next(
|
||||
(
|
||||
exp
|
||||
for exp in expected
|
||||
if isinstance(exp, type) and issubclass(exp, Exception)
|
||||
),
|
||||
None,
|
||||
)
|
||||
if expected_exception is not None:
|
||||
with self.assertRaises(
|
||||
expected_exception=expected_exception, msg=fail_msg or msg
|
||||
):
|
||||
if isinstance(result, Exception):
|
||||
raise result
|
||||
else:
|
||||
expected_list = next(
|
||||
(exp for exp in expected if isinstance(exp, list)), None
|
||||
)
|
||||
expected_dict = next(
|
||||
(exp for exp in expected if isinstance(exp, dict)), None
|
||||
)
|
||||
if (expected_list, expected_dict) != (None, None):
|
||||
self.assertParseResultsEquals(
|
||||
result,
|
||||
expected_list=expected_list,
|
||||
expected_dict=expected_dict,
|
||||
msg=fail_msg or msg,
|
||||
)
|
||||
else:
|
||||
# warning here maybe?
|
||||
print("no validation for {!r}".format(test_string))
|
||||
|
||||
# do this last, in case some specific test results can be reported instead
|
||||
self.assertTrue(
|
||||
run_test_success, msg=msg if msg is not None else "failed runTests"
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def assertRaisesParseException(self, exc_type=ParseException, msg=None):
|
||||
with self.assertRaises(exc_type, msg=msg):
|
||||
yield
|
||||
|
||||
@staticmethod
|
||||
def with_line_numbers(
|
||||
s: str,
|
||||
start_line: Optional[int] = None,
|
||||
end_line: Optional[int] = None,
|
||||
expand_tabs: bool = True,
|
||||
eol_mark: str = "|",
|
||||
mark_spaces: Optional[str] = None,
|
||||
mark_control: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Helpful method for debugging a parser - prints a string with line and column numbers.
|
||||
(Line and column numbers are 1-based.)
|
||||
|
||||
:param s: tuple(bool, str - string to be printed with line and column numbers
|
||||
:param start_line: int - (optional) starting line number in s to print (default=1)
|
||||
:param end_line: int - (optional) ending line number in s to print (default=len(s))
|
||||
:param expand_tabs: bool - (optional) expand tabs to spaces, to match the pyparsing default
|
||||
:param eol_mark: str - (optional) string to mark the end of lines, helps visualize trailing spaces (default="|")
|
||||
:param mark_spaces: str - (optional) special character to display in place of spaces
|
||||
:param mark_control: str - (optional) convert non-printing control characters to a placeholding
|
||||
character; valid values:
|
||||
- "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊"
|
||||
- any single character string - replace control characters with given string
|
||||
- None (default) - string is displayed as-is
|
||||
|
||||
:return: str - input string with leading line numbers and column number headers
|
||||
"""
|
||||
if expand_tabs:
|
||||
s = s.expandtabs()
|
||||
if mark_control is not None:
|
||||
if mark_control == "unicode":
|
||||
tbl = str.maketrans(
|
||||
{c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433))}
|
||||
| {127: 0x2421}
|
||||
)
|
||||
eol_mark = ""
|
||||
else:
|
||||
tbl = str.maketrans(
|
||||
{c: mark_control for c in list(range(0, 32)) + [127]}
|
||||
)
|
||||
s = s.translate(tbl)
|
||||
if mark_spaces is not None and mark_spaces != " ":
|
||||
if mark_spaces == "unicode":
|
||||
tbl = str.maketrans({9: 0x2409, 32: 0x2423})
|
||||
s = s.translate(tbl)
|
||||
else:
|
||||
s = s.replace(" ", mark_spaces)
|
||||
if start_line is None:
|
||||
start_line = 1
|
||||
if end_line is None:
|
||||
end_line = len(s)
|
||||
end_line = min(end_line, len(s))
|
||||
start_line = min(max(1, start_line), end_line)
|
||||
|
||||
if mark_control != "unicode":
|
||||
s_lines = s.splitlines()[start_line - 1 : end_line]
|
||||
else:
|
||||
s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]]
|
||||
if not s_lines:
|
||||
return ""
|
||||
|
||||
lineno_width = len(str(end_line))
|
||||
max_line_len = max(len(line) for line in s_lines)
|
||||
lead = " " * (lineno_width + 1)
|
||||
if max_line_len >= 99:
|
||||
header0 = (
|
||||
lead
|
||||
+ "".join(
|
||||
"{}{}".format(" " * 99, (i + 1) % 100)
|
||||
for i in range(max(max_line_len // 100, 1))
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
else:
|
||||
header0 = ""
|
||||
header1 = (
|
||||
header0
|
||||
+ lead
|
||||
+ "".join(
|
||||
" {}".format((i + 1) % 10)
|
||||
for i in range(-(-max_line_len // 10))
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n"
|
||||
return (
|
||||
header1
|
||||
+ header2
|
||||
+ "\n".join(
|
||||
"{:{}d}:{}{}".format(i, lineno_width, line, eol_mark)
|
||||
for i, line in enumerate(s_lines, start=start_line)
|
||||
)
|
||||
+ "\n"
|
||||
)
|
|
@ -0,0 +1,332 @@
|
|||
# unicode.py
|
||||
|
||||
import sys
|
||||
from itertools import filterfalse
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
|
||||
class _lazyclassproperty:
|
||||
def __init__(self, fn):
|
||||
self.fn = fn
|
||||
self.__doc__ = fn.__doc__
|
||||
self.__name__ = fn.__name__
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if cls is None:
|
||||
cls = type(obj)
|
||||
if not hasattr(cls, "_intern") or any(
|
||||
cls._intern is getattr(superclass, "_intern", [])
|
||||
for superclass in cls.__mro__[1:]
|
||||
):
|
||||
cls._intern = {}
|
||||
attrname = self.fn.__name__
|
||||
if attrname not in cls._intern:
|
||||
cls._intern[attrname] = self.fn(cls)
|
||||
return cls._intern[attrname]
|
||||
|
||||
|
||||
UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
|
||||
|
||||
|
||||
class unicode_set:
|
||||
"""
|
||||
A set of Unicode characters, for language-specific strings for
|
||||
``alphas``, ``nums``, ``alphanums``, and ``printables``.
|
||||
A unicode_set is defined by a list of ranges in the Unicode character
|
||||
set, in a class attribute ``_ranges``. Ranges can be specified using
|
||||
2-tuples or a 1-tuple, such as::
|
||||
|
||||
_ranges = [
|
||||
(0x0020, 0x007e),
|
||||
(0x00a0, 0x00ff),
|
||||
(0x0100,),
|
||||
]
|
||||
|
||||
Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
|
||||
|
||||
A unicode set can also be defined using multiple inheritance of other unicode sets::
|
||||
|
||||
class CJK(Chinese, Japanese, Korean):
|
||||
pass
|
||||
"""
|
||||
|
||||
_ranges: UnicodeRangeList = []
|
||||
|
||||
@_lazyclassproperty
|
||||
def _chars_for_ranges(cls):
|
||||
ret = []
|
||||
for cc in cls.__mro__:
|
||||
if cc is unicode_set:
|
||||
break
|
||||
for rr in getattr(cc, "_ranges", ()):
|
||||
ret.extend(range(rr[0], rr[-1] + 1))
|
||||
return [chr(c) for c in sorted(set(ret))]
|
||||
|
||||
@_lazyclassproperty
|
||||
def printables(cls):
|
||||
"all non-whitespace characters in this range"
|
||||
return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
|
||||
|
||||
@_lazyclassproperty
|
||||
def alphas(cls):
|
||||
"all alphabetic characters in this range"
|
||||
return "".join(filter(str.isalpha, cls._chars_for_ranges))
|
||||
|
||||
@_lazyclassproperty
|
||||
def nums(cls):
|
||||
"all numeric digit characters in this range"
|
||||
return "".join(filter(str.isdigit, cls._chars_for_ranges))
|
||||
|
||||
@_lazyclassproperty
|
||||
def alphanums(cls):
|
||||
"all alphanumeric characters in this range"
|
||||
return cls.alphas + cls.nums
|
||||
|
||||
@_lazyclassproperty
|
||||
def identchars(cls):
|
||||
"all characters in this range that are valid identifier characters, plus underscore '_'"
|
||||
return "".join(
|
||||
sorted(
|
||||
set(
|
||||
"".join(filter(str.isidentifier, cls._chars_for_ranges))
|
||||
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
|
||||
+ "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
|
||||
+ "_"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@_lazyclassproperty
|
||||
def identbodychars(cls):
|
||||
"""
|
||||
all characters in this range that are valid identifier body characters,
|
||||
plus the digits 0-9
|
||||
"""
|
||||
return "".join(
|
||||
sorted(
|
||||
set(
|
||||
cls.identchars
|
||||
+ "0123456789"
|
||||
+ "".join(
|
||||
[c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class pyparsing_unicode(unicode_set):
|
||||
"""
|
||||
A namespace class for defining common language unicode_sets.
|
||||
"""
|
||||
|
||||
_ranges: UnicodeRangeList = [(32, sys.maxunicode)]
|
||||
|
||||
class Latin1(unicode_set):
|
||||
"Unicode set for Latin-1 Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0020, 0x007E),
|
||||
(0x00A0, 0x00FF),
|
||||
]
|
||||
|
||||
class LatinA(unicode_set):
|
||||
"Unicode set for Latin-A Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0100, 0x017F),
|
||||
]
|
||||
|
||||
class LatinB(unicode_set):
|
||||
"Unicode set for Latin-B Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0180, 0x024F),
|
||||
]
|
||||
|
||||
class Greek(unicode_set):
|
||||
"Unicode set for Greek Unicode Character Ranges"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0342, 0x0345),
|
||||
(0x0370, 0x0377),
|
||||
(0x037A, 0x037F),
|
||||
(0x0384, 0x038A),
|
||||
(0x038C,),
|
||||
(0x038E, 0x03A1),
|
||||
(0x03A3, 0x03E1),
|
||||
(0x03F0, 0x03FF),
|
||||
(0x1D26, 0x1D2A),
|
||||
(0x1D5E,),
|
||||
(0x1D60,),
|
||||
(0x1D66, 0x1D6A),
|
||||
(0x1F00, 0x1F15),
|
||||
(0x1F18, 0x1F1D),
|
||||
(0x1F20, 0x1F45),
|
||||
(0x1F48, 0x1F4D),
|
||||
(0x1F50, 0x1F57),
|
||||
(0x1F59,),
|
||||
(0x1F5B,),
|
||||
(0x1F5D,),
|
||||
(0x1F5F, 0x1F7D),
|
||||
(0x1F80, 0x1FB4),
|
||||
(0x1FB6, 0x1FC4),
|
||||
(0x1FC6, 0x1FD3),
|
||||
(0x1FD6, 0x1FDB),
|
||||
(0x1FDD, 0x1FEF),
|
||||
(0x1FF2, 0x1FF4),
|
||||
(0x1FF6, 0x1FFE),
|
||||
(0x2129,),
|
||||
(0x2719, 0x271A),
|
||||
(0xAB65,),
|
||||
(0x10140, 0x1018D),
|
||||
(0x101A0,),
|
||||
(0x1D200, 0x1D245),
|
||||
(0x1F7A1, 0x1F7A7),
|
||||
]
|
||||
|
||||
class Cyrillic(unicode_set):
|
||||
"Unicode set for Cyrillic Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0400, 0x052F),
|
||||
(0x1C80, 0x1C88),
|
||||
(0x1D2B,),
|
||||
(0x1D78,),
|
||||
(0x2DE0, 0x2DFF),
|
||||
(0xA640, 0xA672),
|
||||
(0xA674, 0xA69F),
|
||||
(0xFE2E, 0xFE2F),
|
||||
]
|
||||
|
||||
class Chinese(unicode_set):
|
||||
"Unicode set for Chinese Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x2E80, 0x2E99),
|
||||
(0x2E9B, 0x2EF3),
|
||||
(0x31C0, 0x31E3),
|
||||
(0x3400, 0x4DB5),
|
||||
(0x4E00, 0x9FEF),
|
||||
(0xA700, 0xA707),
|
||||
(0xF900, 0xFA6D),
|
||||
(0xFA70, 0xFAD9),
|
||||
(0x16FE2, 0x16FE3),
|
||||
(0x1F210, 0x1F212),
|
||||
(0x1F214, 0x1F23B),
|
||||
(0x1F240, 0x1F248),
|
||||
(0x20000, 0x2A6D6),
|
||||
(0x2A700, 0x2B734),
|
||||
(0x2B740, 0x2B81D),
|
||||
(0x2B820, 0x2CEA1),
|
||||
(0x2CEB0, 0x2EBE0),
|
||||
(0x2F800, 0x2FA1D),
|
||||
]
|
||||
|
||||
class Japanese(unicode_set):
|
||||
"Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
|
||||
_ranges: UnicodeRangeList = []
|
||||
|
||||
class Kanji(unicode_set):
|
||||
"Unicode set for Kanji Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x4E00, 0x9FBF),
|
||||
(0x3000, 0x303F),
|
||||
]
|
||||
|
||||
class Hiragana(unicode_set):
|
||||
"Unicode set for Hiragana Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x3041, 0x3096),
|
||||
(0x3099, 0x30A0),
|
||||
(0x30FC,),
|
||||
(0xFF70,),
|
||||
(0x1B001,),
|
||||
(0x1B150, 0x1B152),
|
||||
(0x1F200,),
|
||||
]
|
||||
|
||||
class Katakana(unicode_set):
|
||||
"Unicode set for Katakana Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x3099, 0x309C),
|
||||
(0x30A0, 0x30FF),
|
||||
(0x31F0, 0x31FF),
|
||||
(0x32D0, 0x32FE),
|
||||
(0xFF65, 0xFF9F),
|
||||
(0x1B000,),
|
||||
(0x1B164, 0x1B167),
|
||||
(0x1F201, 0x1F202),
|
||||
(0x1F213,),
|
||||
]
|
||||
|
||||
class Hangul(unicode_set):
|
||||
"Unicode set for Hangul (Korean) Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x1100, 0x11FF),
|
||||
(0x302E, 0x302F),
|
||||
(0x3131, 0x318E),
|
||||
(0x3200, 0x321C),
|
||||
(0x3260, 0x327B),
|
||||
(0x327E,),
|
||||
(0xA960, 0xA97C),
|
||||
(0xAC00, 0xD7A3),
|
||||
(0xD7B0, 0xD7C6),
|
||||
(0xD7CB, 0xD7FB),
|
||||
(0xFFA0, 0xFFBE),
|
||||
(0xFFC2, 0xFFC7),
|
||||
(0xFFCA, 0xFFCF),
|
||||
(0xFFD2, 0xFFD7),
|
||||
(0xFFDA, 0xFFDC),
|
||||
]
|
||||
|
||||
Korean = Hangul
|
||||
|
||||
class CJK(Chinese, Japanese, Hangul):
|
||||
"Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
|
||||
pass
|
||||
|
||||
class Thai(unicode_set):
|
||||
"Unicode set for Thai Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)]
|
||||
|
||||
class Arabic(unicode_set):
|
||||
"Unicode set for Arabic Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0600, 0x061B),
|
||||
(0x061E, 0x06FF),
|
||||
(0x0700, 0x077F),
|
||||
]
|
||||
|
||||
class Hebrew(unicode_set):
|
||||
"Unicode set for Hebrew Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [
|
||||
(0x0591, 0x05C7),
|
||||
(0x05D0, 0x05EA),
|
||||
(0x05EF, 0x05F4),
|
||||
(0xFB1D, 0xFB36),
|
||||
(0xFB38, 0xFB3C),
|
||||
(0xFB3E,),
|
||||
(0xFB40, 0xFB41),
|
||||
(0xFB43, 0xFB44),
|
||||
(0xFB46, 0xFB4F),
|
||||
]
|
||||
|
||||
class Devanagari(unicode_set):
|
||||
"Unicode set for Devanagari Unicode Character Range"
|
||||
_ranges: UnicodeRangeList = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)]
|
||||
|
||||
|
||||
pyparsing_unicode.Japanese._ranges = (
|
||||
pyparsing_unicode.Japanese.Kanji._ranges
|
||||
+ pyparsing_unicode.Japanese.Hiragana._ranges
|
||||
+ pyparsing_unicode.Japanese.Katakana._ranges
|
||||
)
|
||||
|
||||
# define ranges in language character sets
|
||||
pyparsing_unicode.العربية = pyparsing_unicode.Arabic
|
||||
pyparsing_unicode.中文 = pyparsing_unicode.Chinese
|
||||
pyparsing_unicode.кириллица = pyparsing_unicode.Cyrillic
|
||||
pyparsing_unicode.Ελληνικά = pyparsing_unicode.Greek
|
||||
pyparsing_unicode.עִברִית = pyparsing_unicode.Hebrew
|
||||
pyparsing_unicode.日本語 = pyparsing_unicode.Japanese
|
||||
pyparsing_unicode.Japanese.漢字 = pyparsing_unicode.Japanese.Kanji
|
||||
pyparsing_unicode.Japanese.カタカナ = pyparsing_unicode.Japanese.Katakana
|
||||
pyparsing_unicode.Japanese.ひらがな = pyparsing_unicode.Japanese.Hiragana
|
||||
pyparsing_unicode.한국어 = pyparsing_unicode.Korean
|
||||
pyparsing_unicode.ไทย = pyparsing_unicode.Thai
|
||||
pyparsing_unicode.देवनागरी = pyparsing_unicode.Devanagari
|
|
@ -0,0 +1,235 @@
|
|||
# util.py
|
||||
import warnings
|
||||
import types
|
||||
import collections
|
||||
import itertools
|
||||
from functools import lru_cache
|
||||
from typing import List, Union, Iterable
|
||||
|
||||
_bslash = chr(92)
|
||||
|
||||
|
||||
class __config_flags:
|
||||
"""Internal class for defining compatibility and debugging flags"""
|
||||
|
||||
_all_names: List[str] = []
|
||||
_fixed_names: List[str] = []
|
||||
_type_desc = "configuration"
|
||||
|
||||
@classmethod
|
||||
def _set(cls, dname, value):
|
||||
if dname in cls._fixed_names:
|
||||
warnings.warn(
|
||||
"{}.{} {} is {} and cannot be overridden".format(
|
||||
cls.__name__,
|
||||
dname,
|
||||
cls._type_desc,
|
||||
str(getattr(cls, dname)).upper(),
|
||||
)
|
||||
)
|
||||
return
|
||||
if dname in cls._all_names:
|
||||
setattr(cls, dname, value)
|
||||
else:
|
||||
raise ValueError("no such {} {!r}".format(cls._type_desc, dname))
|
||||
|
||||
enable = classmethod(lambda cls, name: cls._set(name, True))
|
||||
disable = classmethod(lambda cls, name: cls._set(name, False))
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def col(loc: int, strg: str) -> int:
|
||||
"""
|
||||
Returns current column within a string, counting newlines as line separators.
|
||||
The first column is number 1.
|
||||
|
||||
Note: the default parsing behavior is to expand tabs in the input string
|
||||
before starting the parsing process. See
|
||||
:class:`ParserElement.parseString` for more
|
||||
information on parsing strings containing ``<TAB>`` s, and suggested
|
||||
methods to maintain a consistent view of the parsed string, the parse
|
||||
location, and line and column positions within the parsed string.
|
||||
"""
|
||||
s = strg
|
||||
return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def lineno(loc: int, strg: str) -> int:
|
||||
"""Returns current line number within a string, counting newlines as line separators.
|
||||
The first line is number 1.
|
||||
|
||||
Note - the default parsing behavior is to expand tabs in the input string
|
||||
before starting the parsing process. See :class:`ParserElement.parseString`
|
||||
for more information on parsing strings containing ``<TAB>`` s, and
|
||||
suggested methods to maintain a consistent view of the parsed string, the
|
||||
parse location, and line and column positions within the parsed string.
|
||||
"""
|
||||
return strg.count("\n", 0, loc) + 1
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def line(loc: int, strg: str) -> str:
|
||||
"""
|
||||
Returns the line of text containing loc within a string, counting newlines as line separators.
|
||||
"""
|
||||
last_cr = strg.rfind("\n", 0, loc)
|
||||
next_cr = strg.find("\n", loc)
|
||||
return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :]
|
||||
|
||||
|
||||
class _UnboundedCache:
|
||||
def __init__(self):
|
||||
cache = {}
|
||||
cache_get = cache.get
|
||||
self.not_in_cache = not_in_cache = object()
|
||||
|
||||
def get(_, key):
|
||||
return cache_get(key, not_in_cache)
|
||||
|
||||
def set_(_, key, value):
|
||||
cache[key] = value
|
||||
|
||||
def clear(_):
|
||||
cache.clear()
|
||||
|
||||
self.size = None
|
||||
self.get = types.MethodType(get, self)
|
||||
self.set = types.MethodType(set_, self)
|
||||
self.clear = types.MethodType(clear, self)
|
||||
|
||||
|
||||
class _FifoCache:
|
||||
def __init__(self, size):
|
||||
self.not_in_cache = not_in_cache = object()
|
||||
cache = collections.OrderedDict()
|
||||
cache_get = cache.get
|
||||
|
||||
def get(_, key):
|
||||
return cache_get(key, not_in_cache)
|
||||
|
||||
def set_(_, key, value):
|
||||
cache[key] = value
|
||||
while len(cache) > size:
|
||||
cache.popitem(last=False)
|
||||
|
||||
def clear(_):
|
||||
cache.clear()
|
||||
|
||||
self.size = size
|
||||
self.get = types.MethodType(get, self)
|
||||
self.set = types.MethodType(set_, self)
|
||||
self.clear = types.MethodType(clear, self)
|
||||
|
||||
|
||||
class LRUMemo:
|
||||
"""
|
||||
A memoizing mapping that retains `capacity` deleted items
|
||||
|
||||
The memo tracks retained items by their access order; once `capacity` items
|
||||
are retained, the least recently used item is discarded.
|
||||
"""
|
||||
|
||||
def __init__(self, capacity):
|
||||
self._capacity = capacity
|
||||
self._active = {}
|
||||
self._memory = collections.OrderedDict()
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return self._active[key]
|
||||
except KeyError:
|
||||
self._memory.move_to_end(key)
|
||||
return self._memory[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._memory.pop(key, None)
|
||||
self._active[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
try:
|
||||
value = self._active.pop(key)
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
while len(self._memory) >= self._capacity:
|
||||
self._memory.popitem(last=False)
|
||||
self._memory[key] = value
|
||||
|
||||
def clear(self):
|
||||
self._active.clear()
|
||||
self._memory.clear()
|
||||
|
||||
|
||||
class UnboundedMemo(dict):
|
||||
"""
|
||||
A memoizing mapping that retains all deleted items
|
||||
"""
|
||||
|
||||
def __delitem__(self, key):
|
||||
pass
|
||||
|
||||
|
||||
def _escape_regex_range_chars(s: str) -> str:
|
||||
# escape these chars: ^-[]
|
||||
for c in r"\^-[]":
|
||||
s = s.replace(c, _bslash + c)
|
||||
s = s.replace("\n", r"\n")
|
||||
s = s.replace("\t", r"\t")
|
||||
return str(s)
|
||||
|
||||
|
||||
def _collapse_string_to_ranges(
|
||||
s: Union[str, Iterable[str]], re_escape: bool = True
|
||||
) -> str:
|
||||
def is_consecutive(c):
|
||||
c_int = ord(c)
|
||||
is_consecutive.prev, prev = c_int, is_consecutive.prev
|
||||
if c_int - prev > 1:
|
||||
is_consecutive.value = next(is_consecutive.counter)
|
||||
return is_consecutive.value
|
||||
|
||||
is_consecutive.prev = 0
|
||||
is_consecutive.counter = itertools.count()
|
||||
is_consecutive.value = -1
|
||||
|
||||
def escape_re_range_char(c):
|
||||
return "\\" + c if c in r"\^-][" else c
|
||||
|
||||
def no_escape_re_range_char(c):
|
||||
return c
|
||||
|
||||
if not re_escape:
|
||||
escape_re_range_char = no_escape_re_range_char
|
||||
|
||||
ret = []
|
||||
s = "".join(sorted(set(s)))
|
||||
if len(s) > 3:
|
||||
for _, chars in itertools.groupby(s, key=is_consecutive):
|
||||
first = last = next(chars)
|
||||
last = collections.deque(
|
||||
itertools.chain(iter([last]), chars), maxlen=1
|
||||
).pop()
|
||||
if first == last:
|
||||
ret.append(escape_re_range_char(first))
|
||||
else:
|
||||
sep = "" if ord(last) == ord(first) + 1 else "-"
|
||||
ret.append(
|
||||
"{}{}{}".format(
|
||||
escape_re_range_char(first), sep, escape_re_range_char(last)
|
||||
)
|
||||
)
|
||||
else:
|
||||
ret = [escape_re_range_char(c) for c in s]
|
||||
|
||||
return "".join(ret)
|
||||
|
||||
|
||||
def _flatten(ll: list) -> list:
|
||||
ret = []
|
||||
for i in ll:
|
||||
if isinstance(i, list):
|
||||
ret.extend(_flatten(i))
|
||||
else:
|
||||
ret.append(i)
|
||||
return ret
|
|
@ -8,7 +8,7 @@ packaging==21.3
|
|||
pep517==0.12.0
|
||||
platformdirs==2.4.1
|
||||
progress==1.6
|
||||
pyparsing==2.4.7
|
||||
pyparsing==3.0.7
|
||||
requests==2.27.1
|
||||
certifi==2021.05.30
|
||||
chardet==4.0.0
|
||||
|
|
Loading…
Reference in New Issue