tests versuch 2

2023-07-28 23:30:45 +02:00 · 2023-07-28 23:30:45 +02:00 · c88f7df83a
commit c88f7df83a
parent fdf385fe06
2363 changed files with 408191 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/coverage/phystokens.py
+++ b/venv/lib/python3.11/site-packages/coverage/phystokens.py
@ -0,0 +1,207 @@
+# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
+# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
+
+"""Better tokenizing for coverage.py."""
+
+from __future__ import annotations
+
+import ast
+import io
+import keyword
+import re
+import sys
+import token
+import tokenize
+
+from typing import Iterable, List, Optional, Set, Tuple
+
+from coverage import env
+from coverage.types import TLineNo, TSourceTokenLines
+
+
+TokenInfos = Iterable[tokenize.TokenInfo]
+
+
+def _phys_tokens(toks: TokenInfos) -> TokenInfos:
+    """Return all physical tokens, even line continuations.
+
+    tokenize.generate_tokens() doesn't return a token for the backslash that
+    continues lines.  This wrapper provides those tokens so that we can
+    re-create a faithful representation of the original source.
+
+    Returns the same values as generate_tokens()
+
+    """
+    last_line: Optional[str] = None
+    last_lineno = -1
+    last_ttext: str = ""
+    for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
+        if last_lineno != elineno:
+            if last_line and last_line.endswith("\\\n"):
+                # We are at the beginning of a new line, and the last line
+                # ended with a backslash.  We probably have to inject a
+                # backslash token into the stream. Unfortunately, there's more
+                # to figure out.  This code::
+                #
+                #   usage = """\
+                #   HEY THERE
+                #   """
+                #
+                # triggers this condition, but the token text is::
+                #
+                #   '"""\\\nHEY THERE\n"""'
+                #
+                # so we need to figure out if the backslash is already in the
+                # string token or not.
+                inject_backslash = True
+                if last_ttext.endswith("\\"):
+                    inject_backslash = False
+                elif ttype == token.STRING:
+                    if "\n" in ttext and ttext.split("\n", 1)[0][-1] == "\\":
+                        # It's a multi-line string and the first line ends with
+                        # a backslash, so we don't need to inject another.
+                        inject_backslash = False
+                if inject_backslash:
+                    # Figure out what column the backslash is in.
+                    ccol = len(last_line.split("\n")[-2]) - 1
+                    # Yield the token, with a fake token type.
+                    yield tokenize.TokenInfo(
+                        99999, "\\\n",
+                        (slineno, ccol), (slineno, ccol+2),
+                        last_line
+                    )
+            last_line = ltext
+        if ttype not in (tokenize.NEWLINE, tokenize.NL):
+            last_ttext = ttext
+        yield tokenize.TokenInfo(ttype, ttext, (slineno, scol), (elineno, ecol), ltext)
+        last_lineno = elineno
+
+
+class MatchCaseFinder(ast.NodeVisitor):
+    """Helper for finding match/case lines."""
+    def __init__(self, source: str) -> None:
+        # This will be the set of line numbers that start match or case statements.
+        self.match_case_lines: Set[TLineNo] = set()
+        self.visit(ast.parse(source))
+
+    if sys.version_info >= (3, 10):
+        def visit_Match(self, node: ast.Match) -> None:
+            """Invoked by ast.NodeVisitor.visit"""
+            self.match_case_lines.add(node.lineno)
+            for case in node.cases:
+                self.match_case_lines.add(case.pattern.lineno)
+            self.generic_visit(node)
+
+
+def source_token_lines(source: str) -> TSourceTokenLines:
+    """Generate a series of lines, one for each line in `source`.
+
+    Each line is a list of pairs, each pair is a token::
+
+        [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ]
+
+    Each pair has a token class, and the token text.
+
+    If you concatenate all the token texts, and then join them with newlines,
+    you should have your original `source` back, with two differences:
+    trailing white space is not preserved, and a final line with no newline
+    is indistinguishable from a final line with a newline.
+
+    """
+
+    ws_tokens = {token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL}
+    line: List[Tuple[str, str]] = []
+    col = 0
+
+    source = source.expandtabs(8).replace("\r\n", "\n")
+    tokgen = generate_tokens(source)
+
+    if env.PYBEHAVIOR.soft_keywords:
+        match_case_lines = MatchCaseFinder(source).match_case_lines
+
+    for ttype, ttext, (sline, scol), (_, ecol), _ in _phys_tokens(tokgen):
+        mark_start = True
+        for part in re.split("(\n)", ttext):
+            if part == "\n":
+                yield line
+                line = []
+                col = 0
+                mark_end = False
+            elif part == "":
+                mark_end = False
+            elif ttype in ws_tokens:
+                mark_end = False
+            else:
+                if mark_start and scol > col:
+                    line.append(("ws", " " * (scol - col)))
+                    mark_start = False
+                tok_class = tokenize.tok_name.get(ttype, "xx").lower()[:3]
+                if ttype == token.NAME:
+                    if keyword.iskeyword(ttext):
+                        # Hard keywords are always keywords.
+                        tok_class = "key"
+                    elif sys.version_info >= (3, 10):   # PYVERSIONS
+                        # Need the version_info check to keep mypy from borking
+                        # on issoftkeyword here.
+                        if env.PYBEHAVIOR.soft_keywords and keyword.issoftkeyword(ttext):
+                            # Soft keywords appear at the start of the line,
+                            # on lines that start match or case statements.
+                            if len(line) == 0:
+                                is_start_of_line = True
+                            elif (len(line) == 1) and line[0][0] == "ws":
+                                is_start_of_line = True
+                            else:
+                                is_start_of_line = False
+                            if is_start_of_line and sline in match_case_lines:
+                                tok_class = "key"
+                line.append((tok_class, part))
+                mark_end = True
+            scol = 0
+        if mark_end:
+            col = ecol
+
+    if line:
+        yield line
+
+
+class CachedTokenizer:
+    """A one-element cache around tokenize.generate_tokens.
+
+    When reporting, coverage.py tokenizes files twice, once to find the
+    structure of the file, and once to syntax-color it.  Tokenizing is
+    expensive, and easily cached.
+
+    This is a one-element cache so that our twice-in-a-row tokenizing doesn't
+    actually tokenize twice.
+
+    """
+    def __init__(self) -> None:
+        self.last_text: Optional[str] = None
+        self.last_tokens: List[tokenize.TokenInfo] = []
+
+    def generate_tokens(self, text: str) -> TokenInfos:
+        """A stand-in for `tokenize.generate_tokens`."""
+        if text != self.last_text:
+            self.last_text = text
+            readline = io.StringIO(text).readline
+            try:
+                self.last_tokens = list(tokenize.generate_tokens(readline))
+            except:
+                self.last_text = None
+                raise
+        return self.last_tokens
+
+# Create our generate_tokens cache as a callable replacement function.
+generate_tokens = CachedTokenizer().generate_tokens
+
+
+def source_encoding(source: bytes) -> str:
+    """Determine the encoding for `source`, according to PEP 263.
+
+    `source` is a byte string: the text of the program.
+
+    Returns a string, the name of the encoding.
+
+    """
+    readline = iter(source.splitlines(True)).__next__
+    return tokenize.detect_encoding(readline)[0]