Upgrade chardet to 5.1.0

2023-01-28 20:41:43 +00:00 · 2023-01-28 20:41:43 +00:00 · be20a75c10
parent 1c110bede6
commit be20a75c10
37 changed files with 620 additions and 287 deletions
--- a/news/chardet.vendor.rst
+++ b/news/chardet.vendor.rst
@ -0,0 +1 @@
+Upgrade chardet to 5.1.0
--- a/src/pip/_vendor/chardet.pyi
+++ b/src/pip/_vendor/chardet.pyi
@ -1 +0,0 @@
-from chardet import *
--- a/src/pip/_vendor/chardet/init.py
+++ b/src/pip/_vendor/chardet/init.py
@ -15,19 +15,29 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import List, Union
+
+from .charsetgroupprober import CharSetGroupProber
+from .charsetprober import CharSetProber
 from .enums import InputState
+from .resultdict import ResultDict
 from .universaldetector import UniversalDetector
 from .version import VERSION, __version__

 __all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"]


-def detect(byte_str):
+def detect(
+    byte_str: Union[bytes, bytearray], should_rename_legacy: bool = False
+) -> ResultDict:
    """
    Detect the encoding of the given byte string.

    :param byte_str:     The byte sequence to examine.
    :type byte_str:      ``bytes`` or ``bytearray``
+    :param should_rename_legacy:  Should we rename legacy encodings
+                                  to their more modern equivalents?
+    :type should_rename_legacy:   ``bool``
    """
    if not isinstance(byte_str, bytearray):
        if not isinstance(byte_str, bytes):
@ -35,12 +45,16 @@ def detect(byte_str):
                f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
            )
        byte_str = bytearray(byte_str)
-    detector = UniversalDetector()
+    detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
    detector.feed(byte_str)
    return detector.close()


-def detect_all(byte_str, ignore_threshold=False):
+def detect_all(
+    byte_str: Union[bytes, bytearray],
+    ignore_threshold: bool = False,
+    should_rename_legacy: bool = False,
+) -> List[ResultDict]:
    """
    Detect all the possible encodings of the given byte string.

@ -50,6 +64,9 @@ def detect_all(byte_str, ignore_threshold=False):
                              ``UniversalDetector.MINIMUM_THRESHOLD``
                              in results.
    :type ignore_threshold:   ``bool``
+    :param should_rename_legacy:  Should we rename legacy encodings
+                                  to their more modern equivalents?
+    :type should_rename_legacy:   ``bool``
    """
    if not isinstance(byte_str, bytearray):
        if not isinstance(byte_str, bytes):
@ -58,15 +75,15 @@ def detect_all(byte_str, ignore_threshold=False):
            )
        byte_str = bytearray(byte_str)

-    detector = UniversalDetector()
+    detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
    detector.feed(byte_str)
    detector.close()

    if detector.input_state == InputState.HIGH_BYTE:
-        results = []
-        probers = []
+        results: List[ResultDict] = []
+        probers: List[CharSetProber] = []
        for prober in detector.charset_probers:
-            if hasattr(prober, "probers"):
+            if isinstance(prober, CharSetGroupProber):
                probers.extend(p for p in prober.probers)
            else:
                probers.append(prober)
@ -80,6 +97,11 @@ def detect_all(byte_str, ignore_threshold=False):
                    charset_name = detector.ISO_WIN_MAP.get(
                        lower_charset_name, charset_name
                    )
+                # Rename legacy encodings with superset encodings if asked
+                if should_rename_legacy:
+                    charset_name = detector.LEGACY_MAP.get(
+                        charset_name.lower(), charset_name
+                    )
                results.append(
                    {
                        "encoding": charset_name,
--- a/src/pip/_vendor/chardet/big5prober.py
+++ b/src/pip/_vendor/chardet/big5prober.py
@ -32,16 +32,16 @@ from .mbcssm import BIG5_SM_MODEL


 class Big5Prober(MultiByteCharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
        self.distribution_analyzer = Big5DistributionAnalysis()
        self.reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "Big5"

    @property
-    def language(self):
+    def language(self) -> str:
        return "Chinese"
--- a/src/pip/_vendor/chardet/chardistribution.py
+++ b/src/pip/_vendor/chardet/chardistribution.py
@ -25,6 +25,8 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import Tuple, Union
+
 from .big5freq import (
    BIG5_CHAR_TO_FREQ_ORDER,
    BIG5_TABLE_SIZE,
@ -59,22 +61,22 @@ class CharDistributionAnalysis:
    SURE_NO = 0.01
    MINIMUM_DATA_THRESHOLD = 3

-    def __init__(self):
+    def __init__(self) -> None:
        # Mapping table to get frequency order from char order (get from
        # GetOrder())
-        self._char_to_freq_order = tuple()
-        self._table_size = None  # Size of above table
+        self._char_to_freq_order: Tuple[int, ...] = tuple()
+        self._table_size = 0  # Size of above table
        # This is a constant value which varies from language to language,
        # used in calculating confidence.  See
        # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
        # for further detail.
-        self.typical_distribution_ratio = None
-        self._done = None
-        self._total_chars = None
-        self._freq_chars = None
+        self.typical_distribution_ratio = 0.0
+        self._done = False
+        self._total_chars = 0
+        self._freq_chars = 0
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        """reset analyser, clear any state"""
        # If this flag is set to True, detection is done and conclusion has
        # been made
@ -83,7 +85,7 @@ class CharDistributionAnalysis:
        # The number of characters whose frequency order is less than 512
        self._freq_chars = 0

-    def feed(self, char, char_len):
+    def feed(self, char: Union[bytes, bytearray], char_len: int) -> None:
        """feed a character with known length"""
        if char_len == 2:
            # we only care about 2-bytes character in our distribution analysis
@ -97,7 +99,7 @@ class CharDistributionAnalysis:
                if 512 > self._char_to_freq_order[order]:
                    self._freq_chars += 1

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        """return confidence based on existing data"""
        # if we didn't receive any character in our consideration range,
        # return negative answer
@ -114,12 +116,12 @@ class CharDistributionAnalysis:
        # normalize confidence (we don't want to be 100% sure)
        return self.SURE_YES

-    def got_enough_data(self):
+    def got_enough_data(self) -> bool:
        # It is not necessary to receive all data to draw conclusion.
        # For charset detection, certain amount of data is enough
        return self._total_chars > self.ENOUGH_DATA_THRESHOLD

-    def get_order(self, _):
+    def get_order(self, _: Union[bytes, bytearray]) -> int:
        # We do not handle characters based on the original encoding string,
        # but convert this encoding string to a number, here called order.
        # This allows multiple encodings of a language to share one frequency
@ -128,13 +130,13 @@ class CharDistributionAnalysis:


 class EUCTWDistributionAnalysis(CharDistributionAnalysis):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
        self._table_size = EUCTW_TABLE_SIZE
        self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
        # for euc-TW encoding, we are interested
        #   first  byte range: 0xc4 -- 0xfe
        #   second byte range: 0xa1 -- 0xfe
@ -146,13 +148,13 @@ class EUCTWDistributionAnalysis(CharDistributionAnalysis):


 class EUCKRDistributionAnalysis(CharDistributionAnalysis):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
        self._table_size = EUCKR_TABLE_SIZE
        self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
        # for euc-KR encoding, we are interested
        #   first  byte range: 0xb0 -- 0xfe
        #   second byte range: 0xa1 -- 0xfe
@ -164,13 +166,13 @@ class EUCKRDistributionAnalysis(CharDistributionAnalysis):


 class JOHABDistributionAnalysis(CharDistributionAnalysis):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
        self._table_size = EUCKR_TABLE_SIZE
        self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
        first_char = byte_str[0]
        if 0x88 <= first_char < 0xD4:
            code = first_char * 256 + byte_str[1]
@ -179,13 +181,13 @@ class JOHABDistributionAnalysis(CharDistributionAnalysis):


 class GB2312DistributionAnalysis(CharDistributionAnalysis):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
        self._table_size = GB2312_TABLE_SIZE
        self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
        # for GB2312 encoding, we are interested
        #  first  byte range: 0xb0 -- 0xfe
        #  second byte range: 0xa1 -- 0xfe
@ -197,13 +199,13 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):


 class Big5DistributionAnalysis(CharDistributionAnalysis):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
        self._table_size = BIG5_TABLE_SIZE
        self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
        # for big5 encoding, we are interested
        #   first  byte range: 0xa4 -- 0xfe
        #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
@ -217,13 +219,13 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):


 class SJISDistributionAnalysis(CharDistributionAnalysis):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
        self._table_size = JIS_TABLE_SIZE
        self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
        # for sjis encoding, we are interested
        #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
        #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe
@ -242,13 +244,13 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):


 class EUCJPDistributionAnalysis(CharDistributionAnalysis):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
        self._table_size = JIS_TABLE_SIZE
        self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO

-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
        # for euc-JP encoding, we are interested
        #   first  byte range: 0xa0 -- 0xfe
        #   second byte range: 0xa1 -- 0xfe
--- a/src/pip/_vendor/chardet/charsetgroupprober.py
+++ b/src/pip/_vendor/chardet/charsetgroupprober.py
@ -25,29 +25,30 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import List, Optional, Union
+
 from .charsetprober import CharSetProber
-from .enums import ProbingState
+from .enums import LanguageFilter, ProbingState


 class CharSetGroupProber(CharSetProber):
-    def __init__(self, lang_filter=None):
+    def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
        super().__init__(lang_filter=lang_filter)
        self._active_num = 0
-        self.probers = []
-        self._best_guess_prober = None
+        self.probers: List[CharSetProber] = []
+        self._best_guess_prober: Optional[CharSetProber] = None

-    def reset(self):
+    def reset(self) -> None:
        super().reset()
        self._active_num = 0
        for prober in self.probers:
-            if prober:
-                prober.reset()
-                prober.active = True
-                self._active_num += 1
+            prober.reset()
+            prober.active = True
+            self._active_num += 1
        self._best_guess_prober = None

    @property
-    def charset_name(self):
+    def charset_name(self) -> Optional[str]:
        if not self._best_guess_prober:
            self.get_confidence()
            if not self._best_guess_prober:
@ -55,17 +56,15 @@ class CharSetGroupProber(CharSetProber):
        return self._best_guess_prober.charset_name

    @property
-    def language(self):
+    def language(self) -> Optional[str]:
        if not self._best_guess_prober:
            self.get_confidence()
            if not self._best_guess_prober:
                return None
        return self._best_guess_prober.language

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
        for prober in self.probers:
-            if not prober:
-                continue
            if not prober.active:
                continue
            state = prober.feed(byte_str)
@ -83,7 +82,7 @@ class CharSetGroupProber(CharSetProber):
                    return self.state
        return self.state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        state = self.state
        if state == ProbingState.FOUND_IT:
            return 0.99
@ -92,8 +91,6 @@ class CharSetGroupProber(CharSetProber):
        best_conf = 0.0
        self._best_guess_prober = None
        for prober in self.probers:
-            if not prober:
-                continue
            if not prober.active:
                self.logger.debug("%s not active", prober.charset_name)
                continue
--- a/src/pip/_vendor/chardet/charsetprober.py
+++ b/src/pip/_vendor/chardet/charsetprober.py
@ -28,8 +28,9 @@

 import logging
 import re
+from typing import Optional, Union

-from .enums import ProbingState
+from .enums import LanguageFilter, ProbingState

 INTERNATIONAL_WORDS_PATTERN = re.compile(
    b"[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?"
@ -40,35 +41,40 @@ class CharSetProber:

    SHORTCUT_THRESHOLD = 0.95

-    def __init__(self, lang_filter=None):
-        self._state = None
+    def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
+        self._state = ProbingState.DETECTING
+        self.active = True
        self.lang_filter = lang_filter
        self.logger = logging.getLogger(__name__)

-    def reset(self):
+    def reset(self) -> None:
        self._state = ProbingState.DETECTING

    @property
-    def charset_name(self):
+    def charset_name(self) -> Optional[str]:
        return None

-    def feed(self, byte_str):
+    @property
+    def language(self) -> Optional[str]:
+        raise NotImplementedError
+
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
        raise NotImplementedError

    @property
-    def state(self):
+    def state(self) -> ProbingState:
        return self._state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        return 0.0

    @staticmethod
-    def filter_high_byte_only(buf):
+    def filter_high_byte_only(buf: Union[bytes, bytearray]) -> bytes:
        buf = re.sub(b"([\x00-\x7F])+", b" ", buf)
        return buf

    @staticmethod
-    def filter_international_words(buf):
+    def filter_international_words(buf: Union[bytes, bytearray]) -> bytearray:
        """
        We define three types of bytes:
        alphabet: english alphabets [a-zA-Z]
@ -102,7 +108,7 @@ class CharSetProber:
        return filtered

    @staticmethod
-    def remove_xml_tags(buf):
+    def remove_xml_tags(buf: Union[bytes, bytearray]) -> bytes:
        """
        Returns a copy of ``buf`` that retains only the sequences of English
        alphabet and high byte characters that are not between <> characters.
@ -117,10 +123,13 @@ class CharSetProber:

        for curr, buf_char in enumerate(buf):
            # Check if we're coming out of or entering an XML tag
-            if buf_char == b">":
+
+            # https://github.com/python/typeshed/issues/8182
+            if buf_char == b">":  # type: ignore[comparison-overlap]
                prev = curr + 1
                in_tag = False
-            elif buf_char == b"<":
+            # https://github.com/python/typeshed/issues/8182
+            elif buf_char == b"<":  # type: ignore[comparison-overlap]
                if curr > prev and not in_tag:
                    # Keep everything after last non-extended-ASCII,
                    # non-alphabetic character
--- a/src/pip/_vendor/chardet/cli/chardetect.py
+++ b/src/pip/_vendor/chardet/cli/chardetect.py
@ -15,12 +15,18 @@ If no paths are provided, it takes its input from stdin.

 import argparse
 import sys
+from typing import Iterable, List, Optional

 from .. import __version__
 from ..universaldetector import UniversalDetector


-def description_of(lines, name="stdin"):
+def description_of(
+    lines: Iterable[bytes],
+    name: str = "stdin",
+    minimal: bool = False,
+    should_rename_legacy: bool = False,
+) -> Optional[str]:
    """
    Return a string describing the probable encoding of a file or
    list of strings.
@ -29,8 +35,11 @@ def description_of(lines, name="stdin"):
    :type lines: Iterable of bytes
    :param name: Name of file or collection of lines
    :type name: str
+    :param should_rename_legacy:  Should we rename legacy encodings to
+                                  their more modern equivalents?
+    :type should_rename_legacy:   ``bool``
    """
-    u = UniversalDetector()
+    u = UniversalDetector(should_rename_legacy=should_rename_legacy)
    for line in lines:
        line = bytearray(line)
        u.feed(line)
@ -39,12 +48,14 @@ def description_of(lines, name="stdin"):
            break
    u.close()
    result = u.result
+    if minimal:
+        return result["encoding"]
    if result["encoding"]:
        return f'{name}: {result["encoding"]} with confidence {result["confidence"]}'
    return f"{name}: no result"


-def main(argv=None):
+def main(argv: Optional[List[str]] = None) -> None:
    """
    Handles command line arguments and gets things started.

@ -54,17 +65,28 @@ def main(argv=None):
    """
    # Get command line arguments
    parser = argparse.ArgumentParser(
-        description="Takes one or more file paths and reports their detected \
-                     encodings"
+        description=(
+            "Takes one or more file paths and reports their detected encodings"
+        )
    )
    parser.add_argument(
        "input",
-        help="File whose encoding we would like to determine. \
-                              (default: stdin)",
+        help="File whose encoding we would like to determine. (default: stdin)",
        type=argparse.FileType("rb"),
        nargs="*",
        default=[sys.stdin.buffer],
    )
+    parser.add_argument(
+        "--minimal",
+        help="Print only the encoding to standard output",
+        action="store_true",
+    )
+    parser.add_argument(
+        "-l",
+        "--legacy",
+        help="Rename legacy encodings to more modern ones.",
+        action="store_true",
+    )
    parser.add_argument(
        "--version", action="version", version=f"%(prog)s {__version__}"
    )
@ -79,7 +101,11 @@ def main(argv=None):
                "--help\n",
                file=sys.stderr,
            )
-        print(description_of(f, f.name))
+        print(
+            description_of(
+                f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy
+            )
+        )


 if __name__ == "__main__":
--- a/src/pip/_vendor/chardet/codingstatemachine.py
+++ b/src/pip/_vendor/chardet/codingstatemachine.py
@ -27,6 +27,7 @@

 import logging

+from .codingstatemachinedict import CodingStateMachineDict
 from .enums import MachineState


@ -53,18 +54,19 @@ class CodingStateMachine:
                 encoding from consideration from here on.
    """

-    def __init__(self, sm):
+    def __init__(self, sm: CodingStateMachineDict) -> None:
        self._model = sm
        self._curr_byte_pos = 0
        self._curr_char_len = 0
-        self._curr_state = None
+        self._curr_state = MachineState.START
+        self.active = True
        self.logger = logging.getLogger(__name__)
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        self._curr_state = MachineState.START

-    def next_state(self, c):
+    def next_state(self, c: int) -> int:
        # for each byte we get its class
        # if it is first byte, we also get byte length
        byte_class = self._model["class_table"][c]
@ -77,12 +79,12 @@ class CodingStateMachine:
        self._curr_byte_pos += 1
        return self._curr_state

-    def get_current_charlen(self):
+    def get_current_charlen(self) -> int:
        return self._curr_char_len

-    def get_coding_state_machine(self):
+    def get_coding_state_machine(self) -> str:
        return self._model["name"]

    @property
-    def language(self):
+    def language(self) -> str:
        return self._model["language"]
--- a/src/pip/_vendor/chardet/codingstatemachinedict.py
+++ b/src/pip/_vendor/chardet/codingstatemachinedict.py
@ -0,0 +1,19 @@
+from typing import TYPE_CHECKING, Tuple
+
+if TYPE_CHECKING:
+    # TypedDict was introduced in Python 3.8.
+    #
+    # TODO: Remove the else block and TYPE_CHECKING check when dropping support
+    # for Python 3.7.
+    from typing import TypedDict
+
+    class CodingStateMachineDict(TypedDict, total=False):
+        class_table: Tuple[int, ...]
+        class_factor: int
+        state_table: Tuple[int, ...]
+        char_len_table: Tuple[int, ...]
+        name: str
+        language: str  # Optional key
+
+else:
+    CodingStateMachineDict = dict
--- a/src/pip/_vendor/chardet/cp949prober.py
+++ b/src/pip/_vendor/chardet/cp949prober.py
@ -32,7 +32,7 @@ from .mbcssm import CP949_SM_MODEL


 class CP949Prober(MultiByteCharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
        # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
@ -41,9 +41,9 @@ class CP949Prober(MultiByteCharSetProber):
        self.reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "CP949"

    @property
-    def language(self):
+    def language(self) -> str:
        return "Korean"
--- a/src/pip/_vendor/chardet/enums.py
+++ b/src/pip/_vendor/chardet/enums.py
@ -4,6 +4,8 @@ All of the Enums that are used throughout the chardet package.
 :author: Dan Blanchard (dan.blanchard@gmail.com)
 """

+from enum import Enum, Flag
+

 class InputState:
    """
@ -15,12 +17,13 @@ class InputState:
    HIGH_BYTE = 2


-class LanguageFilter:
+class LanguageFilter(Flag):
    """
    This enum represents the different language filters we can apply to a
    ``UniversalDetector``.
    """

+    NONE = 0x00
    CHINESE_SIMPLIFIED = 0x01
    CHINESE_TRADITIONAL = 0x02
    JAPANESE = 0x04
@ -31,7 +34,7 @@ class LanguageFilter:
    CJK = CHINESE | JAPANESE | KOREAN


-class ProbingState:
+class ProbingState(Enum):
    """
    This enum represents the different states a prober can be in.
    """
@ -62,7 +65,7 @@ class SequenceLikelihood:
    POSITIVE = 3

    @classmethod
-    def get_num_categories(cls):
+    def get_num_categories(cls) -> int:
        """:returns: The number of likelihood categories in the enum."""
        return 4

--- a/src/pip/_vendor/chardet/escprober.py
+++ b/src/pip/_vendor/chardet/escprober.py
@ -25,6 +25,8 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import Optional, Union
+
 from .charsetprober import CharSetProber
 from .codingstatemachine import CodingStateMachine
 from .enums import LanguageFilter, MachineState, ProbingState
@ -43,7 +45,7 @@ class EscCharSetProber(CharSetProber):
    identify these encodings.
    """

-    def __init__(self, lang_filter=None):
+    def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
        super().__init__(lang_filter=lang_filter)
        self.coding_sm = []
        if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
@ -53,17 +55,15 @@ class EscCharSetProber(CharSetProber):
            self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
        if self.lang_filter & LanguageFilter.KOREAN:
            self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
-        self.active_sm_count = None
-        self._detected_charset = None
-        self._detected_language = None
-        self._state = None
+        self.active_sm_count = 0
+        self._detected_charset: Optional[str] = None
+        self._detected_language: Optional[str] = None
+        self._state = ProbingState.DETECTING
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        super().reset()
        for coding_sm in self.coding_sm:
-            if not coding_sm:
-                continue
            coding_sm.active = True
            coding_sm.reset()
        self.active_sm_count = len(self.coding_sm)
@ -71,20 +71,20 @@ class EscCharSetProber(CharSetProber):
        self._detected_language = None

    @property
-    def charset_name(self):
+    def charset_name(self) -> Optional[str]:
        return self._detected_charset

    @property
-    def language(self):
+    def language(self) -> Optional[str]:
        return self._detected_language

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        return 0.99 if self._detected_charset else 0.00

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
        for c in byte_str:
            for coding_sm in self.coding_sm:
-                if not coding_sm or not coding_sm.active:
+                if not coding_sm.active:
                    continue
                coding_state = coding_sm.next_state(c)
                if coding_state == MachineState.ERROR:
--- a/src/pip/_vendor/chardet/escsm.py
+++ b/src/pip/_vendor/chardet/escsm.py
@ -25,6 +25,7 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from .codingstatemachinedict import CodingStateMachineDict
 from .enums import MachineState

 # fmt: off
@ -75,7 +76,7 @@ MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR

 HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)

-HZ_SM_MODEL = {
+HZ_SM_MODEL: CodingStateMachineDict = {
    "class_table": HZ_CLS,
    "class_factor": 6,
    "state_table": HZ_ST,
@ -134,7 +135,7 @@ ISO2022CN_ST = (

 ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)

-ISO2022CN_SM_MODEL = {
+ISO2022CN_SM_MODEL: CodingStateMachineDict = {
    "class_table": ISO2022CN_CLS,
    "class_factor": 9,
    "state_table": ISO2022CN_ST,
@ -194,7 +195,7 @@ ISO2022JP_ST = (

 ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)

-ISO2022JP_SM_MODEL = {
+ISO2022JP_SM_MODEL: CodingStateMachineDict = {
    "class_table": ISO2022JP_CLS,
    "class_factor": 10,
    "state_table": ISO2022JP_ST,
@ -250,7 +251,7 @@ ISO2022KR_ST = (

 ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)

-ISO2022KR_SM_MODEL = {
+ISO2022KR_SM_MODEL: CodingStateMachineDict = {
    "class_table": ISO2022KR_CLS,
    "class_factor": 6,
    "state_table": ISO2022KR_ST,
--- a/src/pip/_vendor/chardet/eucjpprober.py
+++ b/src/pip/_vendor/chardet/eucjpprober.py
@ -25,6 +25,8 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import Union
+
 from .chardistribution import EUCJPDistributionAnalysis
 from .codingstatemachine import CodingStateMachine
 from .enums import MachineState, ProbingState
@ -34,26 +36,29 @@ from .mbcssm import EUCJP_SM_MODEL


 class EUCJPProber(MultiByteCharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
        self.distribution_analyzer = EUCJPDistributionAnalysis()
        self.context_analyzer = EUCJPContextAnalysis()
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        super().reset()
        self.context_analyzer.reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "EUC-JP"

    @property
-    def language(self):
+    def language(self) -> str:
        return "Japanese"

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
+        assert self.coding_sm is not None
+        assert self.distribution_analyzer is not None
+
        for i, byte in enumerate(byte_str):
            # PY3K: byte_str is a byte array, so byte is an int, not a byte
            coding_state = self.coding_sm.next_state(byte)
@ -89,7 +94,9 @@ class EUCJPProber(MultiByteCharSetProber):

        return self.state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
+        assert self.distribution_analyzer is not None
+
        context_conf = self.context_analyzer.get_confidence()
        distrib_conf = self.distribution_analyzer.get_confidence()
        return max(context_conf, distrib_conf)
--- a/src/pip/_vendor/chardet/euckrprober.py
+++ b/src/pip/_vendor/chardet/euckrprober.py
@ -32,16 +32,16 @@ from .mbcssm import EUCKR_SM_MODEL


 class EUCKRProber(MultiByteCharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
        self.distribution_analyzer = EUCKRDistributionAnalysis()
        self.reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "EUC-KR"

    @property
-    def language(self):
+    def language(self) -> str:
        return "Korean"
--- a/src/pip/_vendor/chardet/euctwprober.py
+++ b/src/pip/_vendor/chardet/euctwprober.py
@ -32,16 +32,16 @@ from .mbcssm import EUCTW_SM_MODEL


 class EUCTWProber(MultiByteCharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
        self.distribution_analyzer = EUCTWDistributionAnalysis()
        self.reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "EUC-TW"

    @property
-    def language(self):
+    def language(self) -> str:
        return "Taiwan"
--- a/src/pip/_vendor/chardet/gb2312prober.py
+++ b/src/pip/_vendor/chardet/gb2312prober.py
@ -32,16 +32,16 @@ from .mbcssm import GB2312_SM_MODEL


 class GB2312Prober(MultiByteCharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
        self.distribution_analyzer = GB2312DistributionAnalysis()
        self.reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "GB2312"

    @property
-    def language(self):
+    def language(self) -> str:
        return "Chinese"
--- a/src/pip/_vendor/chardet/hebrewprober.py
+++ b/src/pip/_vendor/chardet/hebrewprober.py
@ -25,8 +25,11 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import Optional, Union
+
 from .charsetprober import CharSetProber
 from .enums import ProbingState
+from .sbcharsetprober import SingleByteCharSetProber

 # This prober doesn't actually recognize a language or a charset.
 # It is a helper prober for the use of the Hebrew model probers
@ -127,6 +130,7 @@ from .enums import ProbingState


 class HebrewProber(CharSetProber):
+    SPACE = 0x20
    # windows-1255 / ISO-8859-8 code points of interest
    FINAL_KAF = 0xEA
    NORMAL_KAF = 0xEB
@ -152,31 +156,35 @@ class HebrewProber(CharSetProber):
    VISUAL_HEBREW_NAME = "ISO-8859-8"
    LOGICAL_HEBREW_NAME = "windows-1255"

-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
-        self._final_char_logical_score = None
-        self._final_char_visual_score = None
-        self._prev = None
-        self._before_prev = None
-        self._logical_prober = None
-        self._visual_prober = None
+        self._final_char_logical_score = 0
+        self._final_char_visual_score = 0
+        self._prev = self.SPACE
+        self._before_prev = self.SPACE
+        self._logical_prober: Optional[SingleByteCharSetProber] = None
+        self._visual_prober: Optional[SingleByteCharSetProber] = None
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        self._final_char_logical_score = 0
        self._final_char_visual_score = 0
        # The two last characters seen in the previous buffer,
        # mPrev and mBeforePrev are initialized to space in order to simulate
        # a word delimiter at the beginning of the data
-        self._prev = " "
-        self._before_prev = " "
+        self._prev = self.SPACE
+        self._before_prev = self.SPACE
        # These probers are owned by the group prober.

-    def set_model_probers(self, logical_prober, visual_prober):
+    def set_model_probers(
+        self,
+        logical_prober: SingleByteCharSetProber,
+        visual_prober: SingleByteCharSetProber,
+    ) -> None:
        self._logical_prober = logical_prober
        self._visual_prober = visual_prober

-    def is_final(self, c):
+    def is_final(self, c: int) -> bool:
        return c in [
            self.FINAL_KAF,
            self.FINAL_MEM,
@ -185,7 +193,7 @@ class HebrewProber(CharSetProber):
            self.FINAL_TSADI,
        ]

-    def is_non_final(self, c):
+    def is_non_final(self, c: int) -> bool:
        # The normal Tsadi is not a good Non-Final letter due to words like
        # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
        # apostrophe is converted to a space in FilterWithoutEnglishLetters
@ -198,7 +206,7 @@ class HebrewProber(CharSetProber):
        # since these words are quite rare.
        return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN, self.NORMAL_PE]

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
        # Final letter analysis for logical-visual decision.
        # Look for evidence that the received buffer is either logical Hebrew
        # or visual Hebrew.
@ -232,9 +240,9 @@ class HebrewProber(CharSetProber):
        byte_str = self.filter_high_byte_only(byte_str)

        for cur in byte_str:
-            if cur == " ":
+            if cur == self.SPACE:
                # We stand on a space - a word just ended
-                if self._before_prev != " ":
+                if self._before_prev != self.SPACE:
                    # next-to-last char was not a space so self._prev is not a
                    # 1 letter word
                    if self.is_final(self._prev):
@ -247,9 +255,9 @@ class HebrewProber(CharSetProber):
            else:
                # Not standing on a space
                if (
-                    (self._before_prev == " ")
+                    (self._before_prev == self.SPACE)
                    and (self.is_final(self._prev))
-                    and (cur != " ")
+                    and (cur != self.SPACE)
                ):
                    # case (3) [-2:space][-1:final letter][cur:not space]
                    self._final_char_visual_score += 1
@ -261,7 +269,10 @@ class HebrewProber(CharSetProber):
        return ProbingState.DETECTING

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
+        assert self._logical_prober is not None
+        assert self._visual_prober is not None
+
        # Make the decision: is it Logical or Visual?
        # If the final letter score distance is dominant enough, rely on it.
        finalsub = self._final_char_logical_score - self._final_char_visual_score
@ -289,11 +300,14 @@ class HebrewProber(CharSetProber):
        return self.LOGICAL_HEBREW_NAME

    @property
-    def language(self):
+    def language(self) -> str:
        return "Hebrew"

    @property
-    def state(self):
+    def state(self) -> ProbingState:
+        assert self._logical_prober is not None
+        assert self._visual_prober is not None
+
        # Remain active as long as any of the model probers are active.
        if (self._logical_prober.state == ProbingState.NOT_ME) and (
            self._visual_prober.state == ProbingState.NOT_ME
--- a/src/pip/_vendor/chardet/johabprober.py
+++ b/src/pip/_vendor/chardet/johabprober.py
@ -32,16 +32,16 @@ from .mbcssm import JOHAB_SM_MODEL


 class JOHABProber(MultiByteCharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL)
        self.distribution_analyzer = JOHABDistributionAnalysis()
        self.reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "Johab"

    @property
-    def language(self):
+    def language(self) -> str:
        return "Korean"
--- a/src/pip/_vendor/chardet/jpcntx.py
+++ b/src/pip/_vendor/chardet/jpcntx.py
@ -25,6 +25,7 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import List, Tuple, Union

 # This is hiragana 2-char sequence table, the number in each cell represents its frequency category
 # fmt: off
@ -123,15 +124,15 @@ class JapaneseContextAnalysis:
    MAX_REL_THRESHOLD = 1000
    MINIMUM_DATA_THRESHOLD = 4

-    def __init__(self):
-        self._total_rel = None
-        self._rel_sample = None
-        self._need_to_skip_char_num = None
-        self._last_char_order = None
-        self._done = None
+    def __init__(self) -> None:
+        self._total_rel = 0
+        self._rel_sample: List[int] = []
+        self._need_to_skip_char_num = 0
+        self._last_char_order = -1
+        self._done = False
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        self._total_rel = 0  # total sequence received
        # category counters, each integer counts sequence in its category
        self._rel_sample = [0] * self.NUM_OF_CATEGORY
@ -143,7 +144,7 @@ class JapaneseContextAnalysis:
        # been made
        self._done = False

-    def feed(self, byte_str, num_bytes):
+    def feed(self, byte_str: Union[bytes, bytearray], num_bytes: int) -> None:
        if self._done:
            return

@ -172,29 +173,29 @@ class JapaneseContextAnalysis:
                    ] += 1
                self._last_char_order = order

-    def got_enough_data(self):
+    def got_enough_data(self) -> bool:
        return self._total_rel > self.ENOUGH_REL_THRESHOLD

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        # This is just one way to calculate confidence. It works well for me.
        if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
            return (self._total_rel - self._rel_sample[0]) / self._total_rel
        return self.DONT_KNOW

-    def get_order(self, _):
+    def get_order(self, _: Union[bytes, bytearray]) -> Tuple[int, int]:
        return -1, 1


 class SJISContextAnalysis(JapaneseContextAnalysis):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self._charset_name = "SHIFT_JIS"

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return self._charset_name

-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]:
        if not byte_str:
            return -1, 1
        # find out current char's byte length
@ -216,7 +217,7 @@ class SJISContextAnalysis(JapaneseContextAnalysis):


 class EUCJPContextAnalysis(JapaneseContextAnalysis):
-    def get_order(self, byte_str):
+    def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]:
        if not byte_str:
            return -1, 1
        # find out current char's byte length
--- a/src/pip/_vendor/chardet/latin1prober.py
+++ b/src/pip/_vendor/chardet/latin1prober.py
@ -26,6 +26,8 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import List, Union
+
 from .charsetprober import CharSetProber
 from .enums import ProbingState

@ -96,26 +98,26 @@ Latin1ClassModel = (


 class Latin1Prober(CharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
-        self._last_char_class = None
-        self._freq_counter = None
+        self._last_char_class = OTH
+        self._freq_counter: List[int] = []
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        self._last_char_class = OTH
        self._freq_counter = [0] * FREQ_CAT_NUM
        super().reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "ISO-8859-1"

    @property
-    def language(self):
+    def language(self) -> str:
        return ""

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
        byte_str = self.remove_xml_tags(byte_str)
        for c in byte_str:
            char_class = Latin1_CharToClass[c]
@ -128,7 +130,7 @@ class Latin1Prober(CharSetProber):

        return self.state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        if self.state == ProbingState.NOT_ME:
            return 0.01

--- a/src/pip/_vendor/chardet/macromanprober.py
+++ b/src/pip/_vendor/chardet/macromanprober.py
@ -0,0 +1,162 @@
+######################## BEGIN LICENSE BLOCK ########################
+# This code was modified from latin1prober.py by Rob Speer <rob@lumino.so>.
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Rob Speer - adapt to MacRoman encoding
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from typing import List, Union
+
+from .charsetprober import CharSetProber
+from .enums import ProbingState
+
+FREQ_CAT_NUM = 4
+
+UDF = 0  # undefined
+OTH = 1  # other
+ASC = 2  # ascii capital letter
+ASS = 3  # ascii small letter
+ACV = 4  # accent capital vowel
+ACO = 5  # accent capital other
+ASV = 6  # accent small vowel
+ASO = 7  # accent small other
+ODD = 8  # character that is unlikely to appear
+CLASS_NUM = 9  # total classes
+
+# The change from Latin1 is that we explicitly look for extended characters
+# that are infrequently-occurring symbols, and consider them to always be
+# improbable. This should let MacRoman get out of the way of more likely
+# encodings in most situations.
+
+# fmt: off
+MacRoman_CharToClass = (
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # 00 - 07
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # 08 - 0F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # 10 - 17
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # 18 - 1F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # 20 - 27
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # 28 - 2F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # 30 - 37
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # 38 - 3F
+    OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,  # 40 - 47
+    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,  # 48 - 4F
+    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,  # 50 - 57
+    ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,  # 58 - 5F
+    OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,  # 60 - 67
+    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,  # 68 - 6F
+    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,  # 70 - 77
+    ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,  # 78 - 7F
+    ACV, ACV, ACO, ACV, ACO, ACV, ACV, ASV,  # 80 - 87
+    ASV, ASV, ASV, ASV, ASV, ASO, ASV, ASV,  # 88 - 8F
+    ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASV,  # 90 - 97
+    ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,  # 98 - 9F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO,  # A0 - A7
+    OTH, OTH, ODD, ODD, OTH, OTH, ACV, ACV,  # A8 - AF
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,  # B0 - B7
+    OTH, OTH, OTH, OTH, OTH, OTH, ASV, ASV,  # B8 - BF
+    OTH, OTH, ODD, OTH, ODD, OTH, OTH, OTH,  # C0 - C7
+    OTH, OTH, OTH, ACV, ACV, ACV, ACV, ASV,  # C8 - CF
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, ODD,  # D0 - D7
+    ASV, ACV, ODD, OTH, OTH, OTH, OTH, OTH,  # D8 - DF
+    OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV,  # E0 - E7
+    ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,  # E8 - EF
+    ODD, ACV, ACV, ACV, ACV, ASV, ODD, ODD,  # F0 - F7
+    ODD, ODD, ODD, ODD, ODD, ODD, ODD, ODD,  # F8 - FF
+)
+
+# 0 : illegal
+# 1 : very unlikely
+# 2 : normal
+# 3 : very likely
+MacRomanClassModel = (
+# UDF OTH ASC ASS ACV ACO ASV ASO ODD
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  # UDF
+    0,  3,  3,  3,  3,  3,  3,  3,  1,  # OTH
+    0,  3,  3,  3,  3,  3,  3,  3,  1,  # ASC
+    0,  3,  3,  3,  1,  1,  3,  3,  1,  # ASS
+    0,  3,  3,  3,  1,  2,  1,  2,  1,  # ACV
+    0,  3,  3,  3,  3,  3,  3,  3,  1,  # ACO
+    0,  3,  1,  3,  1,  1,  1,  3,  1,  # ASV
+    0,  3,  1,  3,  1,  1,  3,  3,  1,  # ASO
+    0,  1,  1,  1,  1,  1,  1,  1,  1,  # ODD
+)
+# fmt: on
+
+
+class MacRomanProber(CharSetProber):
+    def __init__(self) -> None:
+        super().__init__()
+        self._last_char_class = OTH
+        self._freq_counter: List[int] = []
+        self.reset()
+
+    def reset(self) -> None:
+        self._last_char_class = OTH
+        self._freq_counter = [0] * FREQ_CAT_NUM
+
+        # express the prior that MacRoman is a somewhat rare encoding;
+        # this can be done by starting out in a slightly improbable state
+        # that must be overcome
+        self._freq_counter[2] = 10
+
+        super().reset()
+
+    @property
+    def charset_name(self) -> str:
+        return "MacRoman"
+
+    @property
+    def language(self) -> str:
+        return ""
+
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
+        byte_str = self.remove_xml_tags(byte_str)
+        for c in byte_str:
+            char_class = MacRoman_CharToClass[c]
+            freq = MacRomanClassModel[(self._last_char_class * CLASS_NUM) + char_class]
+            if freq == 0:
+                self._state = ProbingState.NOT_ME
+                break
+            self._freq_counter[freq] += 1
+            self._last_char_class = char_class
+
+        return self.state
+
+    def get_confidence(self) -> float:
+        if self.state == ProbingState.NOT_ME:
+            return 0.01
+
+        total = sum(self._freq_counter)
+        confidence = (
+            0.0
+            if total < 0.01
+            else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total
+        )
+        confidence = max(confidence, 0.0)
+        # lower the confidence of MacRoman so that other more accurate
+        # detector can take priority.
+        confidence *= 0.73
+        return confidence
--- a/src/pip/_vendor/chardet/mbcharsetprober.py
+++ b/src/pip/_vendor/chardet/mbcharsetprober.py
@ -27,8 +27,12 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import Optional, Union
+
+from .chardistribution import CharDistributionAnalysis
 from .charsetprober import CharSetProber
-from .enums import MachineState, ProbingState
+from .codingstatemachine import CodingStateMachine
+from .enums import LanguageFilter, MachineState, ProbingState


 class MultiByteCharSetProber(CharSetProber):
@ -36,29 +40,24 @@ class MultiByteCharSetProber(CharSetProber):
    MultiByteCharSetProber
    """

-    def __init__(self, lang_filter=None):
+    def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
        super().__init__(lang_filter=lang_filter)
-        self.distribution_analyzer = None
-        self.coding_sm = None
-        self._last_char = [0, 0]
+        self.distribution_analyzer: Optional[CharDistributionAnalysis] = None
+        self.coding_sm: Optional[CodingStateMachine] = None
+        self._last_char = bytearray(b"\0\0")

-    def reset(self):
+    def reset(self) -> None:
        super().reset()
        if self.coding_sm:
            self.coding_sm.reset()
        if self.distribution_analyzer:
            self.distribution_analyzer.reset()
-        self._last_char = [0, 0]
+        self._last_char = bytearray(b"\0\0")

-    @property
-    def charset_name(self):
-        raise NotImplementedError
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
+        assert self.coding_sm is not None
+        assert self.distribution_analyzer is not None

-    @property
-    def language(self):
-        raise NotImplementedError
-
-    def feed(self, byte_str):
        for i, byte in enumerate(byte_str):
            coding_state = self.coding_sm.next_state(byte)
            if coding_state == MachineState.ERROR:
@ -91,5 +90,6 @@ class MultiByteCharSetProber(CharSetProber):

        return self.state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
+        assert self.distribution_analyzer is not None
        return self.distribution_analyzer.get_confidence()
--- a/src/pip/_vendor/chardet/mbcsgroupprober.py
+++ b/src/pip/_vendor/chardet/mbcsgroupprober.py
@ -30,6 +30,7 @@
 from .big5prober import Big5Prober
 from .charsetgroupprober import CharSetGroupProber
 from .cp949prober import CP949Prober
+from .enums import LanguageFilter
 from .eucjpprober import EUCJPProber
 from .euckrprober import EUCKRProber
 from .euctwprober import EUCTWProber
@ -40,7 +41,7 @@ from .utf8prober import UTF8Prober


 class MBCSGroupProber(CharSetGroupProber):
-    def __init__(self, lang_filter=None):
+    def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
        super().__init__(lang_filter=lang_filter)
        self.probers = [
            UTF8Prober(),
--- a/src/pip/_vendor/chardet/mbcssm.py
+++ b/src/pip/_vendor/chardet/mbcssm.py
@ -25,6 +25,7 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from .codingstatemachinedict import CodingStateMachineDict
 from .enums import MachineState

 # BIG5
@ -74,7 +75,7 @@ BIG5_ST = (

 BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)

-BIG5_SM_MODEL = {
+BIG5_SM_MODEL: CodingStateMachineDict = {
    "class_table": BIG5_CLS,
    "class_factor": 5,
    "state_table": BIG5_ST,
@ -117,7 +118,7 @@ CP949_ST = (

 CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)

-CP949_SM_MODEL = {
+CP949_SM_MODEL: CodingStateMachineDict = {
    "class_table": CP949_CLS,
    "class_factor": 10,
    "state_table": CP949_ST,
@ -173,7 +174,7 @@ EUCJP_ST = (

 EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)

-EUCJP_SM_MODEL = {
+EUCJP_SM_MODEL: CodingStateMachineDict = {
    "class_table": EUCJP_CLS,
    "class_factor": 6,
    "state_table": EUCJP_ST,
@ -226,7 +227,7 @@ EUCKR_ST = (

 EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)

-EUCKR_SM_MODEL = {
+EUCKR_SM_MODEL: CodingStateMachineDict = {
    "class_table": EUCKR_CLS,
    "class_factor": 4,
    "state_table": EUCKR_ST,
@ -283,7 +284,7 @@ JOHAB_ST = (

 JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2)

-JOHAB_SM_MODEL = {
+JOHAB_SM_MODEL: CodingStateMachineDict = {
    "class_table": JOHAB_CLS,
    "class_factor": 10,
    "state_table": JOHAB_ST,
@ -340,7 +341,7 @@ EUCTW_ST = (

 EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)

-EUCTW_SM_MODEL = {
+EUCTW_SM_MODEL: CodingStateMachineDict = {
    "class_table": EUCTW_CLS,
    "class_factor": 7,
    "state_table": EUCTW_ST,
@ -402,7 +403,7 @@ GB2312_ST = (
 # 2 here.
 GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)

-GB2312_SM_MODEL = {
+GB2312_SM_MODEL: CodingStateMachineDict = {
    "class_table": GB2312_CLS,
    "class_factor": 7,
    "state_table": GB2312_ST,
@ -458,7 +459,7 @@ SJIS_ST = (

 SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)

-SJIS_SM_MODEL = {
+SJIS_SM_MODEL: CodingStateMachineDict = {
    "class_table": SJIS_CLS,
    "class_factor": 6,
    "state_table": SJIS_ST,
@ -516,7 +517,7 @@ UCS2BE_ST  = (

 UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)

-UCS2BE_SM_MODEL = {
+UCS2BE_SM_MODEL: CodingStateMachineDict = {
    "class_table": UCS2BE_CLS,
    "class_factor": 6,
    "state_table": UCS2BE_ST,
@ -574,7 +575,7 @@ UCS2LE_ST = (

 UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)

-UCS2LE_SM_MODEL = {
+UCS2LE_SM_MODEL: CodingStateMachineDict = {
    "class_table": UCS2LE_CLS,
    "class_factor": 6,
    "state_table": UCS2LE_ST,
@ -651,7 +652,7 @@ UTF8_ST = (

 UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)

-UTF8_SM_MODEL = {
+UTF8_SM_MODEL: CodingStateMachineDict = {
    "class_table": UTF8_CLS,
    "class_factor": 16,
    "state_table": UTF8_ST,
--- a/src/pip/_vendor/chardet/metadata/languages.py
+++ b/src/pip/_vendor/chardet/metadata/languages.py
@ -6,6 +6,7 @@ This code is based on the language metadata from the uchardet project.
 """

 from string import ascii_letters
+from typing import List, Optional

 # TODO: Add Ukrainian (KOI8-U)

@ -33,13 +34,13 @@ class Language:

    def __init__(
        self,
-        name=None,
-        iso_code=None,
-        use_ascii=True,
-        charsets=None,
-        alphabet=None,
-        wiki_start_pages=None,
-    ):
+        name: Optional[str] = None,
+        iso_code: Optional[str] = None,
+        use_ascii: bool = True,
+        charsets: Optional[List[str]] = None,
+        alphabet: Optional[str] = None,
+        wiki_start_pages: Optional[List[str]] = None,
+    ) -> None:
        super().__init__()
        self.name = name
        self.iso_code = iso_code
@ -55,7 +56,7 @@ class Language:
        self.alphabet = "".join(sorted(set(alphabet))) if alphabet else None
        self.wiki_start_pages = wiki_start_pages

-    def __repr__(self):
+    def __repr__(self) -> str:
        param_str = ", ".join(
            f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
        )
@ -103,7 +104,7 @@ LANGUAGES = {
        name="Danish",
        iso_code="da",
        use_ascii=True,
-        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
        alphabet="æøåÆØÅ",
        wiki_start_pages=["Forside"],
    ),
@ -111,8 +112,8 @@ LANGUAGES = {
        name="German",
        iso_code="de",
        use_ascii=True,
-        charsets=["ISO-8859-1", "WINDOWS-1252"],
-        alphabet="äöüßÄÖÜ",
+        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
+        alphabet="äöüßẞÄÖÜ",
        wiki_start_pages=["Wikipedia:Hauptseite"],
    ),
    "Greek": Language(
@ -127,7 +128,7 @@ LANGUAGES = {
        name="English",
        iso_code="en",
        use_ascii=True,
-        charsets=["ISO-8859-1", "WINDOWS-1252"],
+        charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"],
        wiki_start_pages=["Main_Page"],
    ),
    "Esperanto": Language(
@ -143,7 +144,7 @@ LANGUAGES = {
        name="Spanish",
        iso_code="es",
        use_ascii=True,
-        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
        alphabet="ñáéíóúüÑÁÉÍÓÚÜ",
        wiki_start_pages=["Wikipedia:Portada"],
    ),
@ -161,7 +162,7 @@ LANGUAGES = {
        name="Finnish",
        iso_code="fi",
        use_ascii=True,
-        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
        alphabet="ÅÄÖŠŽåäöšž",
        wiki_start_pages=["Wikipedia:Etusivu"],
    ),
@ -169,7 +170,7 @@ LANGUAGES = {
        name="French",
        iso_code="fr",
        use_ascii=True,
-        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
        alphabet="œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ",
        wiki_start_pages=["Wikipédia:Accueil_principal", "Bœuf (animal)"],
    ),
@ -203,7 +204,7 @@ LANGUAGES = {
        name="Italian",
        iso_code="it",
        use_ascii=True,
-        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
        alphabet="ÀÈÉÌÒÓÙàèéìòóù",
        wiki_start_pages=["Pagina_principale"],
    ),
@ -237,7 +238,7 @@ LANGUAGES = {
        name="Dutch",
        iso_code="nl",
        use_ascii=True,
-        charsets=["ISO-8859-1", "WINDOWS-1252"],
+        charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"],
        wiki_start_pages=["Hoofdpagina"],
    ),
    "Polish": Language(
@ -253,7 +254,7 @@ LANGUAGES = {
        name="Portuguese",
        iso_code="pt",
        use_ascii=True,
-        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+        charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
        alphabet="ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú",
        wiki_start_pages=["Wikipédia:Página_principal"],
    ),
--- a/src/pip/_vendor/chardet/py.typed
+++ b/src/pip/_vendor/chardet/py.typed
--- a/src/pip/_vendor/chardet/resultdict.py
+++ b/src/pip/_vendor/chardet/resultdict.py
@ -0,0 +1,16 @@
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+    # TypedDict was introduced in Python 3.8.
+    #
+    # TODO: Remove the else block and TYPE_CHECKING check when dropping support
+    # for Python 3.7.
+    from typing import TypedDict
+
+    class ResultDict(TypedDict):
+        encoding: Optional[str]
+        confidence: float
+        language: Optional[str]
+
+else:
+    ResultDict = dict
--- a/src/pip/_vendor/chardet/sbcharsetprober.py
+++ b/src/pip/_vendor/chardet/sbcharsetprober.py
@ -26,23 +26,20 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from collections import namedtuple
+from typing import Dict, List, NamedTuple, Optional, Union

 from .charsetprober import CharSetProber
 from .enums import CharacterCategory, ProbingState, SequenceLikelihood

-SingleByteCharSetModel = namedtuple(
-    "SingleByteCharSetModel",
-    [
-        "charset_name",
-        "language",
-        "char_to_order_map",
-        "language_model",
-        "typical_positive_ratio",
-        "keep_ascii_letters",
-        "alphabet",
-    ],
-)
+
+class SingleByteCharSetModel(NamedTuple):
+    charset_name: str
+    language: str
+    char_to_order_map: Dict[int, int]
+    language_model: Dict[int, Dict[int, int]]
+    typical_positive_ratio: float
+    keep_ascii_letters: bool
+    alphabet: str


 class SingleByteCharSetProber(CharSetProber):
@ -51,22 +48,27 @@ class SingleByteCharSetProber(CharSetProber):
    POSITIVE_SHORTCUT_THRESHOLD = 0.95
    NEGATIVE_SHORTCUT_THRESHOLD = 0.05

-    def __init__(self, model, is_reversed=False, name_prober=None):
+    def __init__(
+        self,
+        model: SingleByteCharSetModel,
+        is_reversed: bool = False,
+        name_prober: Optional[CharSetProber] = None,
+    ) -> None:
        super().__init__()
        self._model = model
        # TRUE if we need to reverse every pair in the model lookup
        self._reversed = is_reversed
        # Optional auxiliary prober for name decision
        self._name_prober = name_prober
-        self._last_order = None
-        self._seq_counters = None
-        self._total_seqs = None
-        self._total_char = None
-        self._control_char = None
-        self._freq_char = None
+        self._last_order = 255
+        self._seq_counters: List[int] = []
+        self._total_seqs = 0
+        self._total_char = 0
+        self._control_char = 0
+        self._freq_char = 0
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        super().reset()
        # char order of last character
        self._last_order = 255
@ -78,18 +80,18 @@ class SingleByteCharSetProber(CharSetProber):
        self._freq_char = 0

    @property
-    def charset_name(self):
+    def charset_name(self) -> Optional[str]:
        if self._name_prober:
            return self._name_prober.charset_name
        return self._model.charset_name

    @property
-    def language(self):
+    def language(self) -> Optional[str]:
        if self._name_prober:
            return self._name_prober.language
        return self._model.language

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
        # TODO: Make filter_international_words keep things in self.alphabet
        if not self._model.keep_ascii_letters:
            byte_str = self.filter_international_words(byte_str)
@ -139,7 +141,7 @@ class SingleByteCharSetProber(CharSetProber):

        return self.state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        r = 0.01
        if self._total_seqs > 0:
            r = (
--- a/src/pip/_vendor/chardet/sbcsgroupprober.py
+++ b/src/pip/_vendor/chardet/sbcsgroupprober.py
@ -48,7 +48,7 @@ from .sbcharsetprober import SingleByteCharSetProber


 class SBCSGroupProber(CharSetGroupProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        hebrew_prober = HebrewProber()
        logical_hebrew_prober = SingleByteCharSetProber(
--- a/src/pip/_vendor/chardet/sjisprober.py
+++ b/src/pip/_vendor/chardet/sjisprober.py
@ -25,6 +25,8 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import Union
+
 from .chardistribution import SJISDistributionAnalysis
 from .codingstatemachine import CodingStateMachine
 from .enums import MachineState, ProbingState
@ -34,26 +36,29 @@ from .mbcssm import SJIS_SM_MODEL


 class SJISProber(MultiByteCharSetProber):
-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
        self.distribution_analyzer = SJISDistributionAnalysis()
        self.context_analyzer = SJISContextAnalysis()
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        super().reset()
        self.context_analyzer.reset()

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return self.context_analyzer.charset_name

    @property
-    def language(self):
+    def language(self) -> str:
        return "Japanese"

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
+        assert self.coding_sm is not None
+        assert self.distribution_analyzer is not None
+
        for i, byte in enumerate(byte_str):
            coding_state = self.coding_sm.next_state(byte)
            if coding_state == MachineState.ERROR:
@ -92,7 +97,9 @@ class SJISProber(MultiByteCharSetProber):

        return self.state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
+        assert self.distribution_analyzer is not None
+
        context_conf = self.context_analyzer.get_confidence()
        distrib_conf = self.distribution_analyzer.get_confidence()
        return max(context_conf, distrib_conf)
--- a/src/pip/_vendor/chardet/universaldetector.py
+++ b/src/pip/_vendor/chardet/universaldetector.py
@ -39,12 +39,16 @@ class a user of ``chardet`` should use.
 import codecs
 import logging
 import re
+from typing import List, Optional, Union

 from .charsetgroupprober import CharSetGroupProber
+from .charsetprober import CharSetProber
 from .enums import InputState, LanguageFilter, ProbingState
 from .escprober import EscCharSetProber
 from .latin1prober import Latin1Prober
+from .macromanprober import MacRomanProber
 from .mbcsgroupprober import MBCSGroupProber
+from .resultdict import ResultDict
 from .sbcsgroupprober import SBCSGroupProber
 from .utf1632prober import UTF1632Prober

@ -80,34 +84,55 @@ class UniversalDetector:
        "iso-8859-9": "Windows-1254",
        "iso-8859-13": "Windows-1257",
    }
+    # Based on https://encoding.spec.whatwg.org/#names-and-labels
+    # but altered to match Python names for encodings and remove mappings
+    # that break tests.
+    LEGACY_MAP = {
+        "ascii": "Windows-1252",
+        "iso-8859-1": "Windows-1252",
+        "tis-620": "ISO-8859-11",
+        "iso-8859-9": "Windows-1254",
+        "gb2312": "GB18030",
+        "euc-kr": "CP949",
+        "utf-16le": "UTF-16",
+    }

-    def __init__(self, lang_filter=LanguageFilter.ALL):
-        self._esc_charset_prober = None
-        self._utf1632_prober = None
-        self._charset_probers = []
-        self.result = None
-        self.done = None
-        self._got_data = None
-        self._input_state = None
-        self._last_char = None
+    def __init__(
+        self,
+        lang_filter: LanguageFilter = LanguageFilter.ALL,
+        should_rename_legacy: bool = False,
+    ) -> None:
+        self._esc_charset_prober: Optional[EscCharSetProber] = None
+        self._utf1632_prober: Optional[UTF1632Prober] = None
+        self._charset_probers: List[CharSetProber] = []
+        self.result: ResultDict = {
+            "encoding": None,
+            "confidence": 0.0,
+            "language": None,
+        }
+        self.done = False
+        self._got_data = False
+        self._input_state = InputState.PURE_ASCII
+        self._last_char = b""
        self.lang_filter = lang_filter
        self.logger = logging.getLogger(__name__)
-        self._has_win_bytes = None
+        self._has_win_bytes = False
+        self.should_rename_legacy = should_rename_legacy
        self.reset()

    @property
-    def input_state(self):
+    def input_state(self) -> int:
        return self._input_state

    @property
-    def has_win_bytes(self):
+    def has_win_bytes(self) -> bool:
        return self._has_win_bytes

    @property
-    def charset_probers(self):
+    def charset_probers(self) -> List[CharSetProber]:
        return self._charset_probers

-    def reset(self):
+    def reset(self) -> None:
        """
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
@ -126,7 +151,7 @@ class UniversalDetector:
        for prober in self._charset_probers:
            prober.reset()

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> None:
        """
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.
@ -166,6 +191,7 @@ class UniversalDetector:
            elif byte_str.startswith(b"\xFE\xFF\x00\x00"):
                # FE FF 00 00  UCS-4, unusual octet order BOM (3412)
                self.result = {
+                    # TODO: This encoding is not supported by Python. Should remove?
                    "encoding": "X-ISO-10646-UCS-4-3412",
                    "confidence": 1.0,
                    "language": "",
@ -173,6 +199,7 @@ class UniversalDetector:
            elif byte_str.startswith(b"\x00\x00\xFF\xFE"):
                # 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
                self.result = {
+                    # TODO: This encoding is not supported by Python. Should remove?
                    "encoding": "X-ISO-10646-UCS-4-2143",
                    "confidence": 1.0,
                    "language": "",
@ -242,6 +269,7 @@ class UniversalDetector:
                if self.lang_filter & LanguageFilter.NON_CJK:
                    self._charset_probers.append(SBCSGroupProber())
                self._charset_probers.append(Latin1Prober())
+                self._charset_probers.append(MacRomanProber())
            for prober in self._charset_probers:
                if prober.feed(byte_str) == ProbingState.FOUND_IT:
                    self.result = {
@ -254,7 +282,7 @@ class UniversalDetector:
            if self.WIN_BYTE_DETECTOR.search(byte_str):
                self._has_win_bytes = True

-    def close(self):
+    def close(self) -> ResultDict:
        """
        Stop analyzing the current document and come up with a final
        prediction.
@ -288,7 +316,8 @@ class UniversalDetector:
                    max_prober = prober
            if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
                charset_name = max_prober.charset_name
-                lower_charset_name = max_prober.charset_name.lower()
+                assert charset_name is not None
+                lower_charset_name = charset_name.lower()
                confidence = max_prober.get_confidence()
                # Use Windows encoding name instead of ISO-8859 if we saw any
                # extra Windows-specific bytes
@ -297,6 +326,11 @@ class UniversalDetector:
                        charset_name = self.ISO_WIN_MAP.get(
                            lower_charset_name, charset_name
                        )
+                # Rename legacy encodings with superset encodings if asked
+                if self.should_rename_legacy:
+                    charset_name = self.LEGACY_MAP.get(
+                        (charset_name or "").lower(), charset_name
+                    )
                self.result = {
                    "encoding": charset_name,
                    "confidence": confidence,
--- a/src/pip/_vendor/chardet/utf1632prober.py
+++ b/src/pip/_vendor/chardet/utf1632prober.py
@ -18,6 +18,8 @@
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
+from typing import List, Union
+
 from .charsetprober import CharSetProber
 from .enums import ProbingState

@ -36,7 +38,7 @@ class UTF1632Prober(CharSetProber):
    # a fixed constant ratio of expected zeros or non-zeros in modulo-position.
    EXPECTED_RATIO = 0.94

-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.position = 0
        self.zeros_at_mod = [0] * 4
@ -51,7 +53,7 @@ class UTF1632Prober(CharSetProber):
        self.first_half_surrogate_pair_detected_16le = False
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        super().reset()
        self.position = 0
        self.zeros_at_mod = [0] * 4
@ -66,7 +68,7 @@ class UTF1632Prober(CharSetProber):
        self.quad = [0, 0, 0, 0]

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        if self.is_likely_utf32be():
            return "utf-32be"
        if self.is_likely_utf32le():
@ -79,16 +81,16 @@ class UTF1632Prober(CharSetProber):
        return "utf-16"

    @property
-    def language(self):
+    def language(self) -> str:
        return ""

-    def approx_32bit_chars(self):
+    def approx_32bit_chars(self) -> float:
        return max(1.0, self.position / 4.0)

-    def approx_16bit_chars(self):
+    def approx_16bit_chars(self) -> float:
        return max(1.0, self.position / 2.0)

-    def is_likely_utf32be(self):
+    def is_likely_utf32be(self) -> bool:
        approx_chars = self.approx_32bit_chars()
        return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
            self.zeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
@ -98,7 +100,7 @@ class UTF1632Prober(CharSetProber):
            and not self.invalid_utf32be
        )

-    def is_likely_utf32le(self):
+    def is_likely_utf32le(self) -> bool:
        approx_chars = self.approx_32bit_chars()
        return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
            self.nonzeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
@ -108,7 +110,7 @@ class UTF1632Prober(CharSetProber):
            and not self.invalid_utf32le
        )

-    def is_likely_utf16be(self):
+    def is_likely_utf16be(self) -> bool:
        approx_chars = self.approx_16bit_chars()
        return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
            (self.nonzeros_at_mod[1] + self.nonzeros_at_mod[3]) / approx_chars
@ -118,7 +120,7 @@ class UTF1632Prober(CharSetProber):
            and not self.invalid_utf16be
        )

-    def is_likely_utf16le(self):
+    def is_likely_utf16le(self) -> bool:
        approx_chars = self.approx_16bit_chars()
        return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
            (self.nonzeros_at_mod[0] + self.nonzeros_at_mod[2]) / approx_chars
@ -128,7 +130,7 @@ class UTF1632Prober(CharSetProber):
            and not self.invalid_utf16le
        )

-    def validate_utf32_characters(self, quad):
+    def validate_utf32_characters(self, quad: List[int]) -> None:
        """
        Validate if the quad of bytes is valid UTF-32.

@ -150,7 +152,7 @@ class UTF1632Prober(CharSetProber):
        ):
            self.invalid_utf32le = True

-    def validate_utf16_characters(self, pair):
+    def validate_utf16_characters(self, pair: List[int]) -> None:
        """
        Validate if the pair of bytes is  valid UTF-16.

@ -182,7 +184,7 @@ class UTF1632Prober(CharSetProber):
            else:
                self.invalid_utf16le = True

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
        for c in byte_str:
            mod4 = self.position % 4
            self.quad[mod4] = c
@ -198,7 +200,7 @@ class UTF1632Prober(CharSetProber):
        return self.state

    @property
-    def state(self):
+    def state(self) -> ProbingState:
        if self._state in {ProbingState.NOT_ME, ProbingState.FOUND_IT}:
            # terminal, decided states
            return self._state
@ -210,7 +212,7 @@ class UTF1632Prober(CharSetProber):
            self._state = ProbingState.NOT_ME
        return self._state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        return (
            0.85
            if (
--- a/src/pip/_vendor/chardet/utf8prober.py
+++ b/src/pip/_vendor/chardet/utf8prober.py
@ -25,6 +25,8 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from typing import Union
+
 from .charsetprober import CharSetProber
 from .codingstatemachine import CodingStateMachine
 from .enums import MachineState, ProbingState
@ -34,26 +36,26 @@ from .mbcssm import UTF8_SM_MODEL
 class UTF8Prober(CharSetProber):
    ONE_CHAR_PROB = 0.5

-    def __init__(self):
+    def __init__(self) -> None:
        super().__init__()
        self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
-        self._num_mb_chars = None
+        self._num_mb_chars = 0
        self.reset()

-    def reset(self):
+    def reset(self) -> None:
        super().reset()
        self.coding_sm.reset()
        self._num_mb_chars = 0

    @property
-    def charset_name(self):
+    def charset_name(self) -> str:
        return "utf-8"

    @property
-    def language(self):
+    def language(self) -> str:
        return ""

-    def feed(self, byte_str):
+    def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
        for c in byte_str:
            coding_state = self.coding_sm.next_state(c)
            if coding_state == MachineState.ERROR:
@ -72,7 +74,7 @@ class UTF8Prober(CharSetProber):

        return self.state

-    def get_confidence(self):
+    def get_confidence(self) -> float:
        unlike = 0.99
        if self._num_mb_chars < 6:
            unlike *= self.ONE_CHAR_PROB**self._num_mb_chars
--- a/src/pip/_vendor/chardet/version.py
+++ b/src/pip/_vendor/chardet/version.py
@ -1,9 +1,9 @@
 """
 This module exists only to simplify retrieving the version number of chardet
-from within setup.py and from chardet subpackages.
+from within setuptools and from chardet subpackages.

 :author: Dan Blanchard (dan.blanchard@gmail.com)
 """

-__version__ = "5.0.0"
+__version__ = "5.1.0"
 VERSION = __version__.split(".")
--- a/src/pip/_vendor/vendor.txt
+++ b/src/pip/_vendor/vendor.txt
@ -9,7 +9,7 @@ pyparsing==3.0.9
 pyproject-hooks==1.0.0
 requests==2.28.2
    certifi==2022.12.7
-    chardet==5.0.0
+    chardet==5.1.0
    idna==3.4
    urllib3==1.26.12
 rich==12.6.0