upgrade pyparsing to improve startup time

This commit is contained in:
Anthony Sottile 2022-04-17 08:19:24 -07:00
parent 8133d83929
commit 994bd91c3e
9 changed files with 159 additions and 101 deletions

View File

@ -0,0 +1 @@
Upgrade ``pyparsing`` to 3.0.8 for startup performance improvements.

View File

@ -1 +0,0 @@
from pyparsing import *

View File

@ -1,6 +1,6 @@
# module pyparsing.py
#
# Copyright (c) 2003-2021 Paul T. McGuire
# Copyright (c) 2003-2022 Paul T. McGuire
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
@ -105,14 +105,17 @@ class version_info(NamedTuple):
@property
def __version__(self):
return "{}.{}.{}".format(self.major, self.minor, self.micro) + (
"{}{}{}".format(
"r" if self.releaselevel[0] == "c" else "",
self.releaselevel[0],
self.serial,
),
"",
)[self.releaselevel == "final"]
return (
"{}.{}.{}".format(self.major, self.minor, self.micro)
+ (
"{}{}{}".format(
"r" if self.releaselevel[0] == "c" else "",
self.releaselevel[0],
self.serial,
),
"",
)[self.releaselevel == "final"]
)
def __str__(self):
return "{} {} / {}".format(__name__, self.__version__, __version_time__)
@ -125,8 +128,8 @@ class version_info(NamedTuple):
)
__version_info__ = version_info(3, 0, 7, "final", 0)
__version_time__ = "15 Jan 2022 04:10 UTC"
__version_info__ = version_info(3, 0, 8, "final", 0)
__version_time__ = "09 Apr 2022 23:29 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"

View File

@ -23,7 +23,6 @@ import string
import copy
import warnings
import re
import sre_constants
import sys
from collections.abc import Iterable
import traceback
@ -53,7 +52,7 @@ _MAX_INT = sys.maxsize
str_type: Tuple[type, ...] = (str, bytes)
#
# Copyright (c) 2003-2021 Paul T. McGuire
# Copyright (c) 2003-2022 Paul T. McGuire
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
@ -76,6 +75,19 @@ str_type: Tuple[type, ...] = (str, bytes)
#
if sys.version_info >= (3, 8):
from functools import cached_property
else:
class cached_property:
def __init__(self, func):
self._func = func
def __get__(self, instance, owner=None):
ret = instance.__dict__[self._func.__name__] = self._func(instance)
return ret
class __compat__(__config_flags):
"""
A cross-version compatibility configuration for pyparsing features that will be
@ -246,10 +258,10 @@ hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums
printables = "".join([c for c in string.printable if c not in string.whitespace])
_trim_arity_call_line = None
_trim_arity_call_line: traceback.StackSummary = None
def _trim_arity(func, maxargs=2):
def _trim_arity(func, max_limit=3):
"""decorator to trim function calls to match the arity of the target"""
global _trim_arity_call_line
@ -267,16 +279,12 @@ def _trim_arity(func, maxargs=2):
# synthesize what would be returned by traceback.extract_stack at the call to
# user's parse action 'func', so that we don't incur call penalty at parse time
LINE_DIFF = 11
# fmt: off
LINE_DIFF = 7
# IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
# THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
_trim_arity_call_line = (
_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
)
pa_call_line_synth = (
_trim_arity_call_line[0],
_trim_arity_call_line[1] + LINE_DIFF,
)
_trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1])
pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
def wrapper(*args):
nonlocal found_arity, limit
@ -297,16 +305,18 @@ def _trim_arity(func, maxargs=2):
del tb
if trim_arity_type_error:
if limit <= maxargs:
if limit < max_limit:
limit += 1
continue
raise
# fmt: on
# copy func name to wrapper for sensible debug output
# (can't use functools.wraps, since that messes with function signature)
func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
wrapper.__name__ = func_name
wrapper.__doc__ = func.__doc__
return wrapper
@ -467,7 +477,6 @@ class ParserElement(ABC):
self.modalResults = True
# custom debug actions
self.debugActions = self.DebugActions(None, None, None)
self.re = None
# avoid redundant calls to preParse
self.callPreparse = True
self.callDuringTry = False
@ -1342,7 +1351,7 @@ class ParserElement(ABC):
last = e
yield instring[last:]
def __add__(self, other):
def __add__(self, other) -> "ParserElement":
"""
Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
converts them to :class:`Literal`s by default.
@ -1382,7 +1391,7 @@ class ParserElement(ABC):
)
return And([self, other])
def __radd__(self, other):
def __radd__(self, other) -> "ParserElement":
"""
Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
"""
@ -1399,7 +1408,7 @@ class ParserElement(ABC):
)
return other + self
def __sub__(self, other):
def __sub__(self, other) -> "ParserElement":
"""
Implementation of ``-`` operator, returns :class:`And` with error stop
"""
@ -1413,7 +1422,7 @@ class ParserElement(ABC):
)
return self + And._ErrorStop() + other
def __rsub__(self, other):
def __rsub__(self, other) -> "ParserElement":
"""
Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
"""
@ -1427,7 +1436,7 @@ class ParserElement(ABC):
)
return other - self
def __mul__(self, other):
def __mul__(self, other) -> "ParserElement":
"""
Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
@ -1513,10 +1522,10 @@ class ParserElement(ABC):
ret = And([self] * minElements)
return ret
def __rmul__(self, other):
def __rmul__(self, other) -> "ParserElement":
return self.__mul__(other)
def __or__(self, other):
def __or__(self, other) -> "ParserElement":
"""
Implementation of ``|`` operator - returns :class:`MatchFirst`
"""
@ -1533,7 +1542,7 @@ class ParserElement(ABC):
)
return MatchFirst([self, other])
def __ror__(self, other):
def __ror__(self, other) -> "ParserElement":
"""
Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
"""
@ -1547,7 +1556,7 @@ class ParserElement(ABC):
)
return other | self
def __xor__(self, other):
def __xor__(self, other) -> "ParserElement":
"""
Implementation of ``^`` operator - returns :class:`Or`
"""
@ -1561,7 +1570,7 @@ class ParserElement(ABC):
)
return Or([self, other])
def __rxor__(self, other):
def __rxor__(self, other) -> "ParserElement":
"""
Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
"""
@ -1575,7 +1584,7 @@ class ParserElement(ABC):
)
return other ^ self
def __and__(self, other):
def __and__(self, other) -> "ParserElement":
"""
Implementation of ``&`` operator - returns :class:`Each`
"""
@ -1589,7 +1598,7 @@ class ParserElement(ABC):
)
return Each([self, other])
def __rand__(self, other):
def __rand__(self, other) -> "ParserElement":
"""
Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
"""
@ -1603,7 +1612,7 @@ class ParserElement(ABC):
)
return other & self
def __invert__(self):
def __invert__(self) -> "ParserElement":
"""
Implementation of ``~`` operator - returns :class:`NotAny`
"""
@ -1653,7 +1662,7 @@ class ParserElement(ABC):
ret = self * tuple(key[:2])
return ret
def __call__(self, name: str = None):
def __call__(self, name: str = None) -> "ParserElement":
"""
Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
@ -2140,6 +2149,7 @@ class ParserElement(ABC):
output_html: Union[TextIO, Path, str],
vertical: int = 3,
show_results_names: bool = False,
show_groups: bool = False,
**kwargs,
) -> None:
"""
@ -2152,7 +2162,7 @@ class ParserElement(ABC):
instead of horizontally (default=3)
- show_results_names - bool flag whether diagram should show annotations for
defined results names
- show_groups - bool flag whether groups should be highlighted with an unlabeled surrounding box
Additional diagram-formatting keyword arguments can also be included;
see railroad.Diagram class.
"""
@ -2170,6 +2180,7 @@ class ParserElement(ABC):
self,
vertical=vertical,
show_results_names=show_results_names,
show_groups=show_groups,
diagram_kwargs=kwargs,
)
if isinstance(output_html, (str, Path)):
@ -2219,7 +2230,7 @@ class _PendingSkip(ParserElement):
def _generateDefaultName(self):
return str(self.anchor + Empty()).replace("Empty", "...")
def __add__(self, other):
def __add__(self, other) -> "ParserElement":
skipper = SkipTo(other).set_name("...")("_skipped*")
if self.must_skip:
@ -2773,7 +2784,7 @@ class Word(Token):
try:
self.re = re.compile(self.reString)
except sre_constants.error:
except re.error:
self.re = None
else:
self.re_match = self.re.match
@ -2926,19 +2937,12 @@ class Regex(Token):
if not pattern:
raise ValueError("null string passed to Regex; use Empty() instead")
self.pattern = pattern
self._re = None
self.reString = self.pattern = pattern
self.flags = flags
try:
self.re = re.compile(self.pattern, self.flags)
self.reString = self.pattern
except sre_constants.error:
raise ValueError(
"invalid pattern ({!r}) passed to Regex".format(pattern)
)
elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
self.re = pattern
self._re = pattern
self.pattern = self.reString = pattern.pattern
self.flags = flags
@ -2947,11 +2951,8 @@ class Regex(Token):
"Regex may only be constructed with a string or a compiled RE object"
)
self.re_match = self.re.match
self.errmsg = "Expected " + self.name
self.mayIndexError = False
self.mayReturnEmpty = self.re_match("") is not None
self.asGroupList = asGroupList
self.asMatch = asMatch
if self.asGroupList:
@ -2959,6 +2960,26 @@ class Regex(Token):
if self.asMatch:
self.parseImpl = self.parseImplAsMatch
@cached_property
def re(self):
if self._re:
return self._re
else:
try:
return re.compile(self.pattern, self.flags)
except re.error:
raise ValueError(
"invalid pattern ({!r}) passed to Regex".format(self.pattern)
)
@cached_property
def re_match(self):
return self.re.match
@cached_property
def mayReturnEmpty(self):
return self.re_match("") is not None
def _generateDefaultName(self):
return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))
@ -3168,7 +3189,7 @@ class QuotedString(Token):
self.re = re.compile(self.pattern, self.flags)
self.reString = self.pattern
self.re_match = self.re.match
except sre_constants.error:
except re.error:
raise ValueError(
"invalid pattern {!r} passed to Regex".format(self.pattern)
)
@ -3826,7 +3847,9 @@ class And(ParseExpression):
seen.add(id(cur))
if isinstance(cur, IndentedBlock):
prev.add_parse_action(
lambda s, l, t, cur_=cur: setattr(cur_, "parent_anchor", col(l, s))
lambda s, l, t, cur_=cur: setattr(
cur_, "parent_anchor", col(l, s)
)
)
break
subs = cur.recurse()
@ -5002,20 +5025,20 @@ class SkipTo(ParseElementEnhance):
prints::
['101', 'Critical', 'Intermittent system crash', '6']
- days_open: 6
- desc: Intermittent system crash
- issue_num: 101
- sev: Critical
- days_open: '6'
- desc: 'Intermittent system crash'
- issue_num: '101'
- sev: 'Critical'
['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
- days_open: 14
- desc: Spelling error on Login ('log|n')
- issue_num: 94
- sev: Cosmetic
- days_open: '14'
- desc: "Spelling error on Login ('log|n')"
- issue_num: '94'
- sev: 'Cosmetic'
['79', 'Minor', 'System slow when running too many reports', '47']
- days_open: 47
- desc: System slow when running too many reports
- issue_num: 79
- sev: Minor
- days_open: '47'
- desc: 'System slow when running too many reports'
- issue_num: '79'
- sev: 'Minor'
"""
def __init__(
@ -5473,10 +5496,10 @@ class Dict(TokenConverter):
['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
- color: light blue
- posn: upper left
- shape: SQUARE
- texture: burlap
- color: 'light blue'
- posn: 'upper left'
- shape: 'SQUARE'
- texture: 'burlap'
SQUARE
{'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
@ -5564,13 +5587,13 @@ class Suppress(TokenConverter):
expr = _PendingSkip(NoMatch())
super().__init__(expr)
def __add__(self, other):
def __add__(self, other) -> "ParserElement":
if isinstance(self.expr, _PendingSkip):
return Suppress(SkipTo(other)) + other
else:
return super().__add__(other)
def __sub__(self, other):
def __sub__(self, other) -> "ParserElement":
if isinstance(self.expr, _PendingSkip):
return Suppress(SkipTo(other)) - other
else:

View File

@ -16,6 +16,7 @@ from jinja2 import Template
from io import StringIO
import inspect
with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
template = Template(fp.read())
@ -54,7 +55,7 @@ class AnnotatedItem(railroad.Group):
"""
def __init__(self, label: str, item):
super().__init__(item=item, label="[{}]".format(label))
super().__init__(item=item, label="[{}]".format(label) if label else label)
class EditablePartial(Generic[T]):
@ -137,6 +138,7 @@ def to_railroad(
diagram_kwargs: Optional[dict] = None,
vertical: int = 3,
show_results_names: bool = False,
show_groups: bool = False,
) -> List[NamedDiagram]:
"""
Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
@ -147,6 +149,8 @@ def to_railroad(
shown vertically instead of horizontally
:param show_results_names - bool to indicate whether results name annotations should be
included in the diagram
:param show_groups - bool to indicate whether groups should be highlighted with an unlabeled
surrounding box
"""
# Convert the whole tree underneath the root
lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
@ -156,6 +160,7 @@ def to_railroad(
parent=None,
vertical=vertical,
show_results_names=show_results_names,
show_groups=show_groups,
)
root_id = id(element)
@ -362,6 +367,7 @@ def _apply_diagram_item_enhancements(fn):
index: int = 0,
name_hint: str = None,
show_results_names: bool = False,
show_groups: bool = False,
) -> Optional[EditablePartial]:
ret = fn(
@ -372,6 +378,7 @@ def _apply_diagram_item_enhancements(fn):
index,
name_hint,
show_results_names,
show_groups,
)
# apply annotation for results name, if present
@ -411,6 +418,7 @@ def _to_diagram_element(
index: int = 0,
name_hint: str = None,
show_results_names: bool = False,
show_groups: bool = False,
) -> Optional[EditablePartial]:
"""
Recursively converts a PyParsing Element to a railroad Element
@ -423,6 +431,7 @@ def _to_diagram_element(
:param name_hint: If provided, this will override the generated name
:param show_results_names: bool flag indicating whether to add annotations for results names
:returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
:param show_groups: bool flag indicating whether to show groups using bounding box
"""
exprs = element.recurse()
name = name_hint or element.customName or element.__class__.__name__
@ -437,7 +446,7 @@ def _to_diagram_element(
if isinstance(
element,
(
pyparsing.TokenConverter,
# pyparsing.TokenConverter,
# pyparsing.Forward,
pyparsing.Located,
),
@ -457,6 +466,7 @@ def _to_diagram_element(
index=index,
name_hint=propagated_name,
show_results_names=show_results_names,
show_groups=show_groups,
)
# If the element isn't worth extracting, we always treat it as the first time we say it
@ -510,6 +520,13 @@ def _to_diagram_element(
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
elif isinstance(element, pyparsing.PrecededBy):
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
elif isinstance(element, pyparsing.Group):
if show_groups:
ret = EditablePartial.from_call(AnnotatedItem, label="", item="")
else:
ret = EditablePartial.from_call(railroad.Group, label="", item="")
elif isinstance(element, pyparsing.TokenConverter):
ret = EditablePartial.from_call(AnnotatedItem, label=type(element).__name__.lower(), item="")
elif isinstance(element, pyparsing.Opt):
ret = EditablePartial.from_call(railroad.Optional, item="")
elif isinstance(element, pyparsing.OneOrMore):
@ -558,6 +575,7 @@ def _to_diagram_element(
vertical=vertical,
index=i,
show_results_names=show_results_names,
show_groups=show_groups,
)
# Some elements don't need to be shown in the diagram

View File

@ -185,7 +185,9 @@ def match_previous_expr(expr: ParserElement) -> ParserElement:
def must_match_these_tokens(s, l, t):
theseTokens = _flatten(t.as_list())
if theseTokens != matchTokens:
raise ParseException(s, l, "Expected {}, found{}".format(matchTokens, theseTokens))
raise ParseException(
s, l, "Expected {}, found{}".format(matchTokens, theseTokens)
)
rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
@ -310,7 +312,7 @@ def one_of(
return ret
except sre_constants.error:
except re.error:
warnings.warn(
"Exception creating Regex for one_of, building MatchFirst", stacklevel=2
)
@ -350,10 +352,10 @@ def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
prints::
[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
- color: light blue
- posn: upper left
- shape: SQUARE
- texture: burlap
- color: 'light blue'
- posn: 'upper left'
- shape: 'SQUARE'
- texture: 'burlap'
SQUARE
SQUARE
{'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
@ -758,10 +760,14 @@ def infix_notation(
a tuple or list of functions, this is equivalent to calling
``set_parse_action(*fn)``
(:class:`ParserElement.set_parse_action`)
- ``lpar`` - expression for matching left-parentheses
(default= ``Suppress('(')``)
- ``rpar`` - expression for matching right-parentheses
(default= ``Suppress(')')``)
- ``lpar`` - expression for matching left-parentheses; if passed as a
str, then will be parsed as Suppress(lpar). If lpar is passed as
an expression (such as ``Literal('(')``), then it will be kept in
the parsed results, and grouped with them. (default= ``Suppress('(')``)
- ``rpar`` - expression for matching right-parentheses; if passed as a
str, then will be parsed as Suppress(rpar). If rpar is passed as
an expression (such as ``Literal(')')``), then it will be kept in
the parsed results, and grouped with them. (default= ``Suppress(')')``)
Example::
@ -803,9 +809,17 @@ def infix_notation(
_FB.__name__ = "FollowedBy>"
ret = Forward()
lpar = Suppress(lpar)
rpar = Suppress(rpar)
lastExpr = base_expr | (lpar + ret + rpar)
if isinstance(lpar, str):
lpar = Suppress(lpar)
if isinstance(rpar, str):
rpar = Suppress(rpar)
# if lpar and rpar are not suppressed, wrap in group
if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):
lastExpr = base_expr | Group(lpar + ret + rpar)
else:
lastExpr = base_expr | (lpar + ret + rpar)
for i, operDef in enumerate(op_list):
opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4]
if isinstance(opExpr, str_type):

View File

View File

@ -65,9 +65,9 @@ class ParseResults:
'month' in result -> True
'minutes' in result -> False
result.dump() -> ['1999', '/', '12', '/', '31']
- day: 31
- month: 12
- year: 1999
- day: '31'
- month: '12'
- year: '1999'
"""
_null_values: Tuple[Any, ...] = (None, [], "", ())
@ -301,7 +301,7 @@ class ParseResults:
prints::
['AAB', '123', '321']
- LABEL: AAB
- LABEL: 'AAB'
['AAB', '123', '321']
"""
@ -603,15 +603,15 @@ class ParseResults:
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parse_string('12/31/1999')
result = date_str.parse_string('1999/12/31')
print(result.dump())
prints::
['12', '/', '31', '/', '1999']
- day: 1999
- month: 31
- year: 12
['1999', '/', '12', '/', '31']
- day: '31'
- month: '12'
- year: '1999'
"""
out = []
NL = "\n"

View File

@ -7,7 +7,7 @@ msgpack==1.0.3
packaging==21.3
pep517==0.12.0
platformdirs==2.4.1
pyparsing==3.0.7
pyparsing==3.0.8
requests==2.27.1
certifi==2021.10.08
chardet==4.0.0