Merge pull request #7245 from chrahunt/refactor/simplify-req-file-parsing-2

Simplify requirement file parsing
2019-10-25 19:51:04 +08:00 · 2019-10-25 19:51:04 +08:00 · be01170ba5
parent c729a84b48 a5d53eab0a
commit be01170ba5
2 changed files with 97 additions and 68 deletions
--- a/src/pip/_internal/req/req_file.py
+++ b/src/pip/_internal/req/req_file.py
@ -31,9 +31,11 @@ from pip._internal.utils.typing import MYPY_CHECK_RUNNING
 from pip._internal.utils.urls import get_url_scheme

 if MYPY_CHECK_RUNNING:
+    from optparse import Values
    from typing import (
        Any, Callable, Iterator, List, NoReturn, Optional, Text, Tuple,
    )
+
    from pip._internal.req import InstallRequirement
    from pip._internal.cache import WheelCache
    from pip._internal.index.package_finder import PackageFinder
@ -41,6 +43,9 @@ if MYPY_CHECK_RUNNING:

    ReqFileLines = Iterator[Tuple[int, Text]]

+    LineParser = Callable[[Text], Tuple[str, Values]]
+
+
 __all__ = ['parse_requirements']

 SCHEME_RE = re.compile(r'^(http|https|file):', re.I)
@ -112,7 +117,10 @@ def parse_requirements(
        filename, comes_from=comes_from, session=session
    )

-    lines_enum = preprocess(content, options)
+    skip_requirements_regex = (
+        options.skip_requirements_regex if options else None
+    )
+    lines_enum = preprocess(content, skip_requirements_regex)

    for line_number, line in lines_enum:
        req_iter = process_line(line, filename, line_number, finder,
@ -122,8 +130,8 @@ def parse_requirements(
            yield req


-def preprocess(content, options):
-    # type: (Text, Optional[optparse.Values]) -> ReqFileLines
+def preprocess(content, skip_requirements_regex):
+    # type: (Text, Optional[str]) -> ReqFileLines
    """Split, filter, and join lines, and return a line iterator

    :param content: the content of the requirements file
@ -132,7 +140,8 @@ def preprocess(content, options):
    lines_enum = enumerate(content.splitlines(), start=1)  # type: ReqFileLines
    lines_enum = join_lines(lines_enum)
    lines_enum = ignore_comments(lines_enum)
-    lines_enum = skip_regex(lines_enum, options)
+    if skip_requirements_regex:
+        lines_enum = skip_regex(lines_enum, skip_requirements_regex)
    lines_enum = expand_env_variables(lines_enum)
    return lines_enum

@ -167,19 +176,41 @@ def process_line(
    :param constraint: If True, parsing a constraints file.
    :param options: OptionParser options that we may update
    """
-    parser = build_parser(line)
-    defaults = parser.get_default_values()
-    defaults.index_url = None
-    if finder:
-        defaults.format_control = finder.format_control
-    args_str, options_str = break_args_options(line)
-    # Prior to 2.7.3, shlex cannot deal with unicode entries
-    if sys.version_info < (2, 7, 3):
-        # https://github.com/python/mypy/issues/1174
-        options_str = options_str.encode('utf8')  # type: ignore
-    # https://github.com/python/mypy/issues/1174
-    opts, _ = parser.parse_args(
-        shlex.split(options_str), defaults)  # type: ignore
+    line_parser = get_line_parser(finder)
+    try:
+        args_str, opts = line_parser(line)
+    except OptionParsingError as e:
+        # add offending line
+        msg = 'Invalid requirement: %s\n%s' % (line, e.msg)
+        raise RequirementsFileParseError(msg)
+
+    # parse a nested requirements file
+    if (
+        not args_str and
+        not opts.editables and
+        (opts.requirements or opts.constraints)
+    ):
+        if opts.requirements:
+            req_path = opts.requirements[0]
+            nested_constraint = False
+        else:
+            req_path = opts.constraints[0]
+            nested_constraint = True
+        # original file is over http
+        if SCHEME_RE.search(filename):
+            # do a url join so relative paths work
+            req_path = urllib_parse.urljoin(filename, req_path)
+        # original file and nested file are paths
+        elif not SCHEME_RE.search(req_path):
+            # do a join so relative paths work
+            req_path = os.path.join(os.path.dirname(filename), req_path)
+        parsed_reqs = parse_requirements(
+            req_path, finder, comes_from, options, session,
+            constraint=nested_constraint, wheel_cache=wheel_cache
+        )
+        for req in parsed_reqs:
+            yield req
+        return

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % (
@ -217,30 +248,6 @@ def process_line(
            constraint=constraint, isolated=isolated, wheel_cache=wheel_cache
        )

-    # parse a nested requirements file
-    elif opts.requirements or opts.constraints:
-        if opts.requirements:
-            req_path = opts.requirements[0]
-            nested_constraint = False
-        else:
-            req_path = opts.constraints[0]
-            nested_constraint = True
-        # original file is over http
-        if SCHEME_RE.search(filename):
-            # do a url join so relative paths work
-            req_path = urllib_parse.urljoin(filename, req_path)
-        # original file and nested file are paths
-        elif not SCHEME_RE.search(req_path):
-            # do a join so relative paths work
-            req_path = os.path.join(os.path.dirname(filename), req_path)
-        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
-        parsed_reqs = parse_requirements(
-            req_path, finder, comes_from, options, session,
-            constraint=nested_constraint, wheel_cache=wheel_cache
-        )
-        for req in parsed_reqs:
-            yield req
-
    # percolate hash-checking option upward
    elif opts.require_hashes:
        options.require_hashes = opts.require_hashes
@ -279,6 +286,31 @@ def process_line(
            session.add_trusted_host(host, source=source)


+def get_line_parser(finder):
+    # type: (Optional[PackageFinder]) -> LineParser
+    parser = build_parser()
+    defaults = parser.get_default_values()
+    defaults.index_url = None
+    if finder:
+        defaults.format_control = finder.format_control
+
+    def parse_line(line):
+        # type: (Text) -> Tuple[str, Values]
+        args_str, options_str = break_args_options(line)
+        # Prior to 2.7.3, shlex cannot deal with unicode entries
+        if sys.version_info < (2, 7, 3):
+            # https://github.com/python/mypy/issues/1174
+            options_str = options_str.encode('utf8')  # type: ignore
+
+        # https://github.com/python/mypy/issues/1174
+        opts, _ = parser.parse_args(
+            shlex.split(options_str), defaults)  # type: ignore
+
+        return args_str, opts
+
+    return parse_line
+
+
 def break_args_options(line):
    # type: (Text) -> Tuple[str, Text]
    """Break up the line into an args and options string.  We only want to shlex
@ -297,8 +329,14 @@ def break_args_options(line):
    return ' '.join(args), ' '.join(options)  # type: ignore


-def build_parser(line):
-    # type: (Text) -> optparse.OptionParser
+class OptionParsingError(Exception):
+    def __init__(self, msg):
+        # type: (str) -> None
+        self.msg = msg
+
+
+def build_parser():
+    # type: () -> optparse.OptionParser
    """
    Return a parser for parsing requirement lines
    """
@ -313,9 +351,7 @@ def build_parser(line):
    # that in our own exception.
    def parser_exit(self, msg):
        # type: (Any, str) -> NoReturn
-        # add offending line
-        msg = 'Invalid requirement: %s\n%s' % (line, msg)
-        raise RequirementsFileParseError(msg)
+        raise OptionParsingError(msg)
    # NOTE: mypy disallows assigning to a method
    #       https://github.com/python/mypy/issues/2427
    parser.exit = parser_exit  # type: ignore
@ -365,17 +401,15 @@ def ignore_comments(lines_enum):
            yield line_number, line


-def skip_regex(lines_enum, options):
-    # type: (ReqFileLines, Optional[optparse.Values]) -> ReqFileLines
+def skip_regex(lines_enum, pattern):
+    # type: (ReqFileLines, str) -> ReqFileLines
    """
-    Skip lines that match '--skip-requirements-regex' pattern
+    Skip lines that match the provided pattern

    Note: the regex pattern is only built once
    """
-    skip_regex = options.skip_requirements_regex if options else None
-    if skip_regex:
-        pattern = re.compile(skip_regex)
-        lines_enum = filterfalse(lambda e: pattern.search(e[1]), lines_enum)
+    matcher = re.compile(pattern)
+    lines_enum = filterfalse(lambda e: matcher.search(e[1]), lines_enum)
    return lines_enum


--- a/tests/unit/test_req_file.py
+++ b/tests/unit/test_req_file.py
@ -83,8 +83,8 @@ class TestPreprocess(object):
          ern
          line2
        """)
-        options.skip_requirements_regex = 'pattern'
-        result = preprocess(content, options)
+        skip_requirements_regex = 'pattern'
+        result = preprocess(content, skip_requirements_regex)
        assert list(result) == [(3, 'line2')]

    def test_skip_regex_after_joining_case2(self, options):
@ -93,8 +93,8 @@ class TestPreprocess(object):
          line2
          line3
        """)
-        options.skip_requirements_regex = 'pattern'
-        result = preprocess(content, options)
+        skip_requirements_regex = 'pattern'
+        result = preprocess(content, skip_requirements_regex)
        assert list(result) == [(3, 'line3')]


@ -154,24 +154,19 @@ class TestSkipRegex(object):
    """tests for `skip_reqex``"""

    def test_skip_regex_pattern_match(self):
-        options = stub(skip_requirements_regex='.*Bad.*')
+        pattern = '.*Bad.*'
        line = '--extra-index-url Bad'
-        assert [] == list(skip_regex(enumerate([line]), options))
+        assert [] == list(skip_regex(enumerate([line]), pattern))

    def test_skip_regex_pattern_not_match(self):
-        options = stub(skip_requirements_regex='.*Bad.*')
+        pattern = '.*Bad.*'
        line = '--extra-index-url Good'
-        assert [(0, line)] == list(skip_regex(enumerate([line]), options))
+        assert [(0, line)] == list(skip_regex(enumerate([line]), pattern))

    def test_skip_regex_no_options(self):
-        options = None
+        pattern = None
        line = '--extra-index-url Good'
-        assert [(0, line)] == list(skip_regex(enumerate([line]), options))
-
-    def test_skip_regex_no_skip_option(self):
-        options = stub(skip_requirements_regex=None)
-        line = '--extra-index-url Good'
-        assert [(0, line)] == list(skip_regex(enumerate([line]), options))
+        assert [(1, line)] == list(preprocess(line, pattern))


 class TestProcessLine(object):