1
0
Fork 0
forked from bton/matekasse

tests versuch 2

This commit is contained in:
2000-Trek 2023-07-28 23:30:45 +02:00
parent fdf385fe06
commit c88f7df83a
2363 changed files with 408191 additions and 0 deletions

View file

@ -0,0 +1,15 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
__title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__version__ = "23.1"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"
__license__ = "BSD-2-Clause or Apache-2.0"
__copyright__ = "2014-2019 %s" % __author__

View file

@ -0,0 +1,108 @@
"""
ELF file parser.
This provides a class ``ELFFile`` that parses an ELF executable in a similar
interface to ``ZipFile``. Only the read interface is implemented.
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
"""
import enum
import os
import struct
from typing import IO, Optional, Tuple
class ELFInvalid(ValueError):
pass
class EIClass(enum.IntEnum):
C32 = 1
C64 = 2
class EIData(enum.IntEnum):
Lsb = 1
Msb = 2
class EMachine(enum.IntEnum):
I386 = 3
S390 = 22
Arm = 40
X8664 = 62
AArc64 = 183
class ELFFile:
"""
Representation of an ELF executable.
"""
def __init__(self, f: IO[bytes]) -> None:
self._f = f
try:
ident = self._read("16B")
except struct.error:
raise ELFInvalid("unable to parse identification")
magic = bytes(ident[:4])
if magic != b"\x7fELF":
raise ELFInvalid(f"invalid magic: {magic!r}")
self.capacity = ident[4] # Format for program header (bitness).
self.encoding = ident[5] # Data structure encoding (endianness).
try:
# e_fmt: Format for program header.
# p_fmt: Format for section header.
# p_idx: Indexes to find p_type, p_offset, and p_filesz.
e_fmt, self._p_fmt, self._p_idx = {
(1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
(1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
(2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
(2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
}[(self.capacity, self.encoding)]
except KeyError:
raise ELFInvalid(
f"unrecognized capacity ({self.capacity}) or "
f"encoding ({self.encoding})"
)
try:
(
_,
self.machine, # Architecture type.
_,
_,
self._e_phoff, # Offset of program header.
_,
self.flags, # Processor-specific flags.
_,
self._e_phentsize, # Size of section.
self._e_phnum, # Number of sections.
) = self._read(e_fmt)
except struct.error as e:
raise ELFInvalid("unable to parse machine and section information") from e
def _read(self, fmt: str) -> Tuple[int, ...]:
return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
@property
def interpreter(self) -> Optional[str]:
"""
The path recorded in the ``PT_INTERP`` section header.
"""
for index in range(self._e_phnum):
self._f.seek(self._e_phoff + self._e_phentsize * index)
try:
data = self._read(self._p_fmt)
except struct.error:
continue
if data[self._p_idx[0]] != 3: # Not PT_INTERP.
continue
self._f.seek(data[self._p_idx[1]])
return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
return None

View file

@ -0,0 +1,240 @@
import collections
import contextlib
import functools
import os
import re
import sys
import warnings
from typing import Dict, Generator, Iterator, NamedTuple, Optional, Tuple
from ._elffile import EIClass, EIData, ELFFile, EMachine
EF_ARM_ABIMASK = 0xFF000000
EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
# as the type for `path` until then.
@contextlib.contextmanager
def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]:
try:
with open(path, "rb") as f:
yield ELFFile(f)
except (OSError, TypeError, ValueError):
yield None
def _is_linux_armhf(executable: str) -> bool:
# hard-float ABI can be detected from the ELF header of the running
# process
# https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
with _parse_elf(executable) as f:
return (
f is not None
and f.capacity == EIClass.C32
and f.encoding == EIData.Lsb
and f.machine == EMachine.Arm
and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
)
def _is_linux_i686(executable: str) -> bool:
with _parse_elf(executable) as f:
return (
f is not None
and f.capacity == EIClass.C32
and f.encoding == EIData.Lsb
and f.machine == EMachine.I386
)
def _have_compatible_abi(executable: str, arch: str) -> bool:
if arch == "armv7l":
return _is_linux_armhf(executable)
if arch == "i686":
return _is_linux_i686(executable)
return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"}
# If glibc ever changes its major version, we need to know what the last
# minor version was, so we can build the complete list of all versions.
# For now, guess what the highest minor version might be, assume it will
# be 50 for testing. Once this actually happens, update the dictionary
# with the actual value.
_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50)
class _GLibCVersion(NamedTuple):
major: int
minor: int
def _glibc_version_string_confstr() -> Optional[str]:
"""
Primary implementation of glibc_version_string using os.confstr.
"""
# os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
# to be broken or missing. This strategy is used in the standard library
# platform module.
# https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
try:
# Should be a string like "glibc 2.17".
version_string: str = getattr(os, "confstr")("CS_GNU_LIBC_VERSION")
assert version_string is not None
_, version = version_string.rsplit()
except (AssertionError, AttributeError, OSError, ValueError):
# os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
return None
return version
def _glibc_version_string_ctypes() -> Optional[str]:
"""
Fallback implementation of glibc_version_string using ctypes.
"""
try:
import ctypes
except ImportError:
return None
# ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
# manpage says, "If filename is NULL, then the returned handle is for the
# main program". This way we can let the linker do the work to figure out
# which libc our process is actually using.
#
# We must also handle the special case where the executable is not a
# dynamically linked executable. This can occur when using musl libc,
# for example. In this situation, dlopen() will error, leading to an
# OSError. Interestingly, at least in the case of musl, there is no
# errno set on the OSError. The single string argument used to construct
# OSError comes from libc itself and is therefore not portable to
# hard code here. In any case, failure to call dlopen() means we
# can proceed, so we bail on our attempt.
try:
process_namespace = ctypes.CDLL(None)
except OSError:
return None
try:
gnu_get_libc_version = process_namespace.gnu_get_libc_version
except AttributeError:
# Symbol doesn't exist -> therefore, we are not linked to
# glibc.
return None
# Call gnu_get_libc_version, which returns a string like "2.5"
gnu_get_libc_version.restype = ctypes.c_char_p
version_str: str = gnu_get_libc_version()
# py2 / py3 compatibility:
if not isinstance(version_str, str):
version_str = version_str.decode("ascii")
return version_str
def _glibc_version_string() -> Optional[str]:
"""Returns glibc version string, or None if not using glibc."""
return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
def _parse_glibc_version(version_str: str) -> Tuple[int, int]:
"""Parse glibc version.
We use a regexp instead of str.split because we want to discard any
random junk that might come after the minor version -- this might happen
in patched/forked versions of glibc (e.g. Linaro's version of glibc
uses version strings like "2.20-2014.11"). See gh-3588.
"""
m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
if not m:
warnings.warn(
f"Expected glibc version with 2 components major.minor,"
f" got: {version_str}",
RuntimeWarning,
)
return -1, -1
return int(m.group("major")), int(m.group("minor"))
@functools.lru_cache()
def _get_glibc_version() -> Tuple[int, int]:
version_str = _glibc_version_string()
if version_str is None:
return (-1, -1)
return _parse_glibc_version(version_str)
# From PEP 513, PEP 600
def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool:
sys_glibc = _get_glibc_version()
if sys_glibc < version:
return False
# Check for presence of _manylinux module.
try:
import _manylinux # noqa
except ImportError:
return True
if hasattr(_manylinux, "manylinux_compatible"):
result = _manylinux.manylinux_compatible(version[0], version[1], arch)
if result is not None:
return bool(result)
return True
if version == _GLibCVersion(2, 5):
if hasattr(_manylinux, "manylinux1_compatible"):
return bool(_manylinux.manylinux1_compatible)
if version == _GLibCVersion(2, 12):
if hasattr(_manylinux, "manylinux2010_compatible"):
return bool(_manylinux.manylinux2010_compatible)
if version == _GLibCVersion(2, 17):
if hasattr(_manylinux, "manylinux2014_compatible"):
return bool(_manylinux.manylinux2014_compatible)
return True
_LEGACY_MANYLINUX_MAP = {
# CentOS 7 w/ glibc 2.17 (PEP 599)
(2, 17): "manylinux2014",
# CentOS 6 w/ glibc 2.12 (PEP 571)
(2, 12): "manylinux2010",
# CentOS 5 w/ glibc 2.5 (PEP 513)
(2, 5): "manylinux1",
}
def platform_tags(linux: str, arch: str) -> Iterator[str]:
if not _have_compatible_abi(sys.executable, arch):
return
# Oldest glibc to be supported regardless of architecture is (2, 17).
too_old_glibc2 = _GLibCVersion(2, 16)
if arch in {"x86_64", "i686"}:
# On x86/i686 also oldest glibc to be supported is (2, 5).
too_old_glibc2 = _GLibCVersion(2, 4)
current_glibc = _GLibCVersion(*_get_glibc_version())
glibc_max_list = [current_glibc]
# We can assume compatibility across glibc major versions.
# https://sourceware.org/bugzilla/show_bug.cgi?id=24636
#
# Build a list of maximum glibc versions so that we can
# output the canonical list of all glibc from current_glibc
# down to too_old_glibc2, including all intermediary versions.
for glibc_major in range(current_glibc.major - 1, 1, -1):
glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
for glibc_max in glibc_max_list:
if glibc_max.major == too_old_glibc2.major:
min_minor = too_old_glibc2.minor
else:
# For other glibc major versions oldest supported is (x, 0).
min_minor = -1
for glibc_minor in range(glibc_max.minor, min_minor, -1):
glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
tag = "manylinux_{}_{}".format(*glibc_version)
if _is_compatible(tag, arch, glibc_version):
yield linux.replace("linux", tag)
# Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
if glibc_version in _LEGACY_MANYLINUX_MAP:
legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
if _is_compatible(legacy_tag, arch, glibc_version):
yield linux.replace("linux", legacy_tag)

View file

@ -0,0 +1,80 @@
"""PEP 656 support.
This module implements logic to detect if the currently running Python is
linked against musl, and what musl version is used.
"""
import functools
import re
import subprocess
import sys
from typing import Iterator, NamedTuple, Optional
from ._elffile import ELFFile
class _MuslVersion(NamedTuple):
major: int
minor: int
def _parse_musl_version(output: str) -> Optional[_MuslVersion]:
lines = [n for n in (n.strip() for n in output.splitlines()) if n]
if len(lines) < 2 or lines[0][:4] != "musl":
return None
m = re.match(r"Version (\d+)\.(\d+)", lines[1])
if not m:
return None
return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
@functools.lru_cache()
def _get_musl_version(executable: str) -> Optional[_MuslVersion]:
"""Detect currently-running musl runtime version.
This is done by checking the specified executable's dynamic linking
information, and invoking the loader to parse its output for a version
string. If the loader is musl, the output would be something like::
musl libc (x86_64)
Version 1.2.2
Dynamic Program Loader
"""
try:
with open(executable, "rb") as f:
ld = ELFFile(f).interpreter
except (OSError, TypeError, ValueError):
return None
if ld is None or "musl" not in ld:
return None
proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True)
return _parse_musl_version(proc.stderr)
def platform_tags(arch: str) -> Iterator[str]:
"""Generate musllinux tags compatible to the current platform.
:param arch: Should be the part of platform tag after the ``linux_``
prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a
prerequisite for the current platform to be musllinux-compatible.
:returns: An iterator of compatible musllinux tags.
"""
sys_musl = _get_musl_version(sys.executable)
if sys_musl is None: # Python not dynamically linked against musl.
return
for minor in range(sys_musl.minor, -1, -1):
yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
if __name__ == "__main__": # pragma: no cover
import sysconfig
plat = sysconfig.get_platform()
assert plat.startswith("linux-"), "not linux"
print("plat:", plat)
print("musl:", _get_musl_version(sys.executable))
print("tags:", end=" ")
for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
print(t, end="\n ")

View file

@ -0,0 +1,353 @@
"""Handwritten parser of dependency specifiers.
The docstring for each __parse_* function contains ENBF-inspired grammar representing
the implementation.
"""
import ast
from typing import Any, List, NamedTuple, Optional, Tuple, Union
from ._tokenizer import DEFAULT_RULES, Tokenizer
class Node:
def __init__(self, value: str) -> None:
self.value = value
def __str__(self) -> str:
return self.value
def __repr__(self) -> str:
return f"<{self.__class__.__name__}('{self}')>"
def serialize(self) -> str:
raise NotImplementedError
class Variable(Node):
def serialize(self) -> str:
return str(self)
class Value(Node):
def serialize(self) -> str:
return f'"{self}"'
class Op(Node):
def serialize(self) -> str:
return str(self)
MarkerVar = Union[Variable, Value]
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]]
# MarkerList = List[Union["MarkerList", MarkerAtom, str]]
# mypy does not support recursive type definition
# https://github.com/python/mypy/issues/731
MarkerAtom = Any
MarkerList = List[Any]
class ParsedRequirement(NamedTuple):
name: str
url: str
extras: List[str]
specifier: str
marker: Optional[MarkerList]
# --------------------------------------------------------------------------------------
# Recursive descent parser for dependency specifier
# --------------------------------------------------------------------------------------
def parse_requirement(source: str) -> ParsedRequirement:
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
"""
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
"""
tokenizer.consume("WS")
name_token = tokenizer.expect(
"IDENTIFIER", expected="package name at the start of dependency specifier"
)
name = name_token.text
tokenizer.consume("WS")
extras = _parse_extras(tokenizer)
tokenizer.consume("WS")
url, specifier, marker = _parse_requirement_details(tokenizer)
tokenizer.expect("END", expected="end of dependency specifier")
return ParsedRequirement(name, url, extras, specifier, marker)
def _parse_requirement_details(
tokenizer: Tokenizer,
) -> Tuple[str, str, Optional[MarkerList]]:
"""
requirement_details = AT URL (WS requirement_marker?)?
| specifier WS? (requirement_marker)?
"""
specifier = ""
url = ""
marker = None
if tokenizer.check("AT"):
tokenizer.read()
tokenizer.consume("WS")
url_start = tokenizer.position
url = tokenizer.expect("URL", expected="URL after @").text
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
tokenizer.expect("WS", expected="whitespace after URL")
# The input might end after whitespace.
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer, span_start=url_start, after="URL and whitespace"
)
else:
specifier_start = tokenizer.position
specifier = _parse_specifier(tokenizer)
tokenizer.consume("WS")
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer,
span_start=specifier_start,
after=(
"version specifier"
if specifier
else "name and no valid version specifier"
),
)
return (url, specifier, marker)
def _parse_requirement_marker(
tokenizer: Tokenizer, *, span_start: int, after: str
) -> MarkerList:
"""
requirement_marker = SEMICOLON marker WS?
"""
if not tokenizer.check("SEMICOLON"):
tokenizer.raise_syntax_error(
f"Expected end or semicolon (after {after})",
span_start=span_start,
)
tokenizer.read()
marker = _parse_marker(tokenizer)
tokenizer.consume("WS")
return marker
def _parse_extras(tokenizer: Tokenizer) -> List[str]:
"""
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
"""
if not tokenizer.check("LEFT_BRACKET", peek=True):
return []
with tokenizer.enclosing_tokens(
"LEFT_BRACKET",
"RIGHT_BRACKET",
around="extras",
):
tokenizer.consume("WS")
extras = _parse_extras_list(tokenizer)
tokenizer.consume("WS")
return extras
def _parse_extras_list(tokenizer: Tokenizer) -> List[str]:
"""
extras_list = identifier (wsp* ',' wsp* identifier)*
"""
extras: List[str] = []
if not tokenizer.check("IDENTIFIER"):
return extras
extras.append(tokenizer.read().text)
while True:
tokenizer.consume("WS")
if tokenizer.check("IDENTIFIER", peek=True):
tokenizer.raise_syntax_error("Expected comma between extra names")
elif not tokenizer.check("COMMA"):
break
tokenizer.read()
tokenizer.consume("WS")
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
extras.append(extra_token.text)
return extras
def _parse_specifier(tokenizer: Tokenizer) -> str:
"""
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS?
"""
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="version specifier",
):
tokenizer.consume("WS")
parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume("WS")
return parsed_specifiers
def _parse_version_many(tokenizer: Tokenizer) -> str:
"""
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
"""
parsed_specifiers = ""
while tokenizer.check("SPECIFIER"):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
tokenizer.raise_syntax_error(
".* suffix can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
tokenizer.raise_syntax_error(
"Local version label can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume("WS")
if not tokenizer.check("COMMA"):
break
parsed_specifiers += tokenizer.read().text
tokenizer.consume("WS")
return parsed_specifiers
# --------------------------------------------------------------------------------------
# Recursive descent parser for marker expression
# --------------------------------------------------------------------------------------
def parse_marker(source: str) -> MarkerList:
return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
"""
marker = marker_atom (BOOLOP marker_atom)+
"""
expression = [_parse_marker_atom(tokenizer)]
while tokenizer.check("BOOLOP"):
token = tokenizer.read()
expr_right = _parse_marker_atom(tokenizer)
expression.extend((token.text, expr_right))
return expression
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
"""
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
| WS? marker_item WS?
"""
tokenizer.consume("WS")
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="marker expression",
):
tokenizer.consume("WS")
marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume("WS")
else:
marker = _parse_marker_item(tokenizer)
tokenizer.consume("WS")
return marker
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
"""
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
"""
tokenizer.consume("WS")
marker_var_left = _parse_marker_var(tokenizer)
tokenizer.consume("WS")
marker_op = _parse_marker_op(tokenizer)
tokenizer.consume("WS")
marker_var_right = _parse_marker_var(tokenizer)
tokenizer.consume("WS")
return (marker_var_left, marker_op, marker_var_right)
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
"""
marker_var = VARIABLE | QUOTED_STRING
"""
if tokenizer.check("VARIABLE"):
return process_env_var(tokenizer.read().text.replace(".", "_"))
elif tokenizer.check("QUOTED_STRING"):
return process_python_str(tokenizer.read().text)
else:
tokenizer.raise_syntax_error(
message="Expected a marker variable or quoted string"
)
def process_env_var(env_var: str) -> Variable:
if (
env_var == "platform_python_implementation"
or env_var == "python_implementation"
):
return Variable("platform_python_implementation")
else:
return Variable(env_var)
def process_python_str(python_str: str) -> Value:
value = ast.literal_eval(python_str)
return Value(str(value))
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
"""
marker_op = IN | NOT IN | OP
"""
if tokenizer.check("IN"):
tokenizer.read()
return Op("in")
elif tokenizer.check("NOT"):
tokenizer.read()
tokenizer.expect("WS", expected="whitespace after 'not'")
tokenizer.expect("IN", expected="'in' after 'not'")
return Op("not in")
elif tokenizer.check("OP"):
return Op(tokenizer.read().text)
else:
return tokenizer.raise_syntax_error(
"Expected marker operator, one of "
"<=, <, !=, ==, >=, >, ~=, ===, in, not in"
)

View file

@ -0,0 +1,61 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
class InfinityType:
def __repr__(self) -> str:
return "Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return False
def __le__(self, other: object) -> bool:
return False
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__)
def __gt__(self, other: object) -> bool:
return True
def __ge__(self, other: object) -> bool:
return True
def __neg__(self: object) -> "NegativeInfinityType":
return NegativeInfinity
Infinity = InfinityType()
class NegativeInfinityType:
def __repr__(self) -> str:
return "-Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return True
def __le__(self, other: object) -> bool:
return True
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__)
def __gt__(self, other: object) -> bool:
return False
def __ge__(self, other: object) -> bool:
return False
def __neg__(self: object) -> InfinityType:
return Infinity
NegativeInfinity = NegativeInfinityType()

View file

@ -0,0 +1,192 @@
import contextlib
import re
from dataclasses import dataclass
from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union
from .specifiers import Specifier
@dataclass
class Token:
name: str
text: str
position: int
class ParserSyntaxError(Exception):
"""The provided source text could not be parsed correctly."""
def __init__(
self,
message: str,
*,
source: str,
span: Tuple[int, int],
) -> None:
self.span = span
self.message = message
self.source = source
super().__init__()
def __str__(self) -> str:
marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
return "\n ".join([self.message, self.source, marker])
DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
"LEFT_PARENTHESIS": r"\(",
"RIGHT_PARENTHESIS": r"\)",
"LEFT_BRACKET": r"\[",
"RIGHT_BRACKET": r"\]",
"SEMICOLON": r";",
"COMMA": r",",
"QUOTED_STRING": re.compile(
r"""
(
('[^']*')
|
("[^"]*")
)
""",
re.VERBOSE,
),
"OP": r"(===|==|~=|!=|<=|>=|<|>)",
"BOOLOP": r"\b(or|and)\b",
"IN": r"\bin\b",
"NOT": r"\bnot\b",
"VARIABLE": re.compile(
r"""
\b(
python_version
|python_full_version
|os[._]name
|sys[._]platform
|platform_(release|system)
|platform[._](version|machine|python_implementation)
|python_implementation
|implementation_(name|version)
|extra
)\b
""",
re.VERBOSE,
),
"SPECIFIER": re.compile(
Specifier._operator_regex_str + Specifier._version_regex_str,
re.VERBOSE | re.IGNORECASE,
),
"AT": r"\@",
"URL": r"[^ \t]+",
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
"VERSION_PREFIX_TRAIL": r"\.\*",
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
"WS": r"[ \t]+",
"END": r"$",
}
class Tokenizer:
"""Context-sensitive token parsing.
Provides methods to examine the input stream to check whether the next token
matches.
"""
def __init__(
self,
source: str,
*,
rules: "Dict[str, Union[str, re.Pattern[str]]]",
) -> None:
self.source = source
self.rules: Dict[str, re.Pattern[str]] = {
name: re.compile(pattern) for name, pattern in rules.items()
}
self.next_token: Optional[Token] = None
self.position = 0
def consume(self, name: str) -> None:
"""Move beyond provided token name, if at current position."""
if self.check(name):
self.read()
def check(self, name: str, *, peek: bool = False) -> bool:
"""Check whether the next token has the provided name.
By default, if the check succeeds, the token *must* be read before
another check. If `peek` is set to `True`, the token is not loaded and
would need to be checked again.
"""
assert (
self.next_token is None
), f"Cannot check for {name!r}, already have {self.next_token!r}"
assert name in self.rules, f"Unknown token name: {name!r}"
expression = self.rules[name]
match = expression.match(self.source, self.position)
if match is None:
return False
if not peek:
self.next_token = Token(name, match[0], self.position)
return True
def expect(self, name: str, *, expected: str) -> Token:
"""Expect a certain token name next, failing with a syntax error otherwise.
The token is *not* read.
"""
if not self.check(name):
raise self.raise_syntax_error(f"Expected {expected}")
return self.read()
def read(self) -> Token:
"""Consume the next token and return it."""
token = self.next_token
assert token is not None
self.position += len(token.text)
self.next_token = None
return token
def raise_syntax_error(
self,
message: str,
*,
span_start: Optional[int] = None,
span_end: Optional[int] = None,
) -> NoReturn:
"""Raise ParserSyntaxError at the given position."""
span = (
self.position if span_start is None else span_start,
self.position if span_end is None else span_end,
)
raise ParserSyntaxError(
message,
source=self.source,
span=span,
)
@contextlib.contextmanager
def enclosing_tokens(
self, open_token: str, close_token: str, *, around: str
) -> Iterator[None]:
if self.check(open_token):
open_position = self.position
self.read()
else:
open_position = None
yield
if open_position is None:
return
if not self.check(close_token):
self.raise_syntax_error(
f"Expected matching {close_token} for {open_token}, after {around}",
span_start=open_position,
)
self.read()

View file

@ -0,0 +1,252 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import operator
import os
import platform
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from ._parser import (
MarkerAtom,
MarkerList,
Op,
Value,
Variable,
parse_marker as _parse_marker,
)
from ._tokenizer import ParserSyntaxError
from .specifiers import InvalidSpecifier, Specifier
from .utils import canonicalize_name
__all__ = [
"InvalidMarker",
"UndefinedComparison",
"UndefinedEnvironmentName",
"Marker",
"default_environment",
]
Operator = Callable[[str, str], bool]
class InvalidMarker(ValueError):
"""
An invalid marker was found, users should refer to PEP 508.
"""
class UndefinedComparison(ValueError):
"""
An invalid operation was attempted on a value that doesn't support it.
"""
class UndefinedEnvironmentName(ValueError):
"""
A name was attempted to be used that does not exist inside of the
environment.
"""
def _normalize_extra_values(results: Any) -> Any:
"""
Normalize extra values.
"""
if isinstance(results[0], tuple):
lhs, op, rhs = results[0]
if isinstance(lhs, Variable) and lhs.value == "extra":
normalized_extra = canonicalize_name(rhs.value)
rhs = Value(normalized_extra)
elif isinstance(rhs, Variable) and rhs.value == "extra":
normalized_extra = canonicalize_name(lhs.value)
lhs = Value(normalized_extra)
results[0] = lhs, op, rhs
return results
def _format_marker(
marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True
) -> str:
assert isinstance(marker, (list, tuple, str))
# Sometimes we have a structure like [[...]] which is a single item list
# where the single item is itself it's own list. In that case we want skip
# the rest of this function so that we don't get extraneous () on the
# outside.
if (
isinstance(marker, list)
and len(marker) == 1
and isinstance(marker[0], (list, tuple))
):
return _format_marker(marker[0])
if isinstance(marker, list):
inner = (_format_marker(m, first=False) for m in marker)
if first:
return " ".join(inner)
else:
return "(" + " ".join(inner) + ")"
elif isinstance(marker, tuple):
return " ".join([m.serialize() for m in marker])
else:
return marker
_operators: Dict[str, Operator] = {
"in": lambda lhs, rhs: lhs in rhs,
"not in": lambda lhs, rhs: lhs not in rhs,
"<": operator.lt,
"<=": operator.le,
"==": operator.eq,
"!=": operator.ne,
">=": operator.ge,
">": operator.gt,
}
def _eval_op(lhs: str, op: Op, rhs: str) -> bool:
try:
spec = Specifier("".join([op.serialize(), rhs]))
except InvalidSpecifier:
pass
else:
return spec.contains(lhs, prereleases=True)
oper: Optional[Operator] = _operators.get(op.serialize())
if oper is None:
raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
return oper(lhs, rhs)
def _normalize(*values: str, key: str) -> Tuple[str, ...]:
# PEP 685 Comparison of extra names for optional distribution dependencies
# https://peps.python.org/pep-0685/
# > When comparing extra names, tools MUST normalize the names being
# > compared using the semantics outlined in PEP 503 for names
if key == "extra":
return tuple(canonicalize_name(v) for v in values)
# other environment markers don't have such standards
return values
def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool:
groups: List[List[bool]] = [[]]
for marker in markers:
assert isinstance(marker, (list, tuple, str))
if isinstance(marker, list):
groups[-1].append(_evaluate_markers(marker, environment))
elif isinstance(marker, tuple):
lhs, op, rhs = marker
if isinstance(lhs, Variable):
environment_key = lhs.value
lhs_value = environment[environment_key]
rhs_value = rhs.value
else:
lhs_value = lhs.value
environment_key = rhs.value
rhs_value = environment[environment_key]
lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
groups[-1].append(_eval_op(lhs_value, op, rhs_value))
else:
assert marker in ["and", "or"]
if marker == "or":
groups.append([])
return any(all(item) for item in groups)
def format_full_version(info: "sys._version_info") -> str:
version = "{0.major}.{0.minor}.{0.micro}".format(info)
kind = info.releaselevel
if kind != "final":
version += kind[0] + str(info.serial)
return version
def default_environment() -> Dict[str, str]:
iver = format_full_version(sys.implementation.version)
implementation_name = sys.implementation.name
return {
"implementation_name": implementation_name,
"implementation_version": iver,
"os_name": os.name,
"platform_machine": platform.machine(),
"platform_release": platform.release(),
"platform_system": platform.system(),
"platform_version": platform.version(),
"python_full_version": platform.python_version(),
"platform_python_implementation": platform.python_implementation(),
"python_version": ".".join(platform.python_version_tuple()[:2]),
"sys_platform": sys.platform,
}
class Marker:
def __init__(self, marker: str) -> None:
# Note: We create a Marker object without calling this constructor in
# packaging.requirements.Requirement. If any additional logic is
# added here, make sure to mirror/adapt Requirement.
try:
self._markers = _normalize_extra_values(_parse_marker(marker))
# The attribute `_markers` can be described in terms of a recursive type:
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
#
# For example, the following expression:
# python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
#
# is parsed into:
# [
# (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
# 'and',
# [
# (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
# 'or',
# (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
# ]
# ]
except ParserSyntaxError as e:
raise InvalidMarker(str(e)) from e
def __str__(self) -> str:
return _format_marker(self._markers)
def __repr__(self) -> str:
return f"<Marker('{self}')>"
def __hash__(self) -> int:
return hash((self.__class__.__name__, str(self)))
def __eq__(self, other: Any) -> bool:
if not isinstance(other, Marker):
return NotImplemented
return str(self) == str(other)
def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool:
"""Evaluate a marker.
Return the boolean from evaluating the given marker against the
environment. environment is an optional argument to override all or
part of the determined environment.
The environment is determined from the current Python process.
"""
current_environment = default_environment()
current_environment["extra"] = ""
if environment is not None:
current_environment.update(environment)
# The API used to allow setting extra to None. We need to handle this
# case for backwards compatibility.
if current_environment["extra"] is None:
current_environment["extra"] = ""
return _evaluate_markers(self._markers, current_environment)

View file

@ -0,0 +1,408 @@
import email.feedparser
import email.header
import email.message
import email.parser
import email.policy
import sys
import typing
from typing import Dict, List, Optional, Tuple, Union, cast
if sys.version_info >= (3, 8): # pragma: no cover
from typing import TypedDict
else: # pragma: no cover
if typing.TYPE_CHECKING:
from typing_extensions import TypedDict
else:
try:
from typing_extensions import TypedDict
except ImportError:
class TypedDict:
def __init_subclass__(*_args, **_kwargs):
pass
# The RawMetadata class attempts to make as few assumptions about the underlying
# serialization formats as possible. The idea is that as long as a serialization
# formats offer some very basic primitives in *some* way then we can support
# serializing to and from that format.
class RawMetadata(TypedDict, total=False):
"""A dictionary of raw core metadata.
Each field in core metadata maps to a key of this dictionary (when data is
provided). The key is lower-case and underscores are used instead of dashes
compared to the equivalent core metadata field. Any core metadata field that
can be specified multiple times or can hold multiple values in a single
field have a key with a plural name.
Core metadata fields that can be specified multiple times are stored as a
list or dict depending on which is appropriate for the field. Any fields
which hold multiple values in a single field are stored as a list.
"""
# Metadata 1.0 - PEP 241
metadata_version: str
name: str
version: str
platforms: List[str]
summary: str
description: str
keywords: List[str]
home_page: str
author: str
author_email: str
license: str
# Metadata 1.1 - PEP 314
supported_platforms: List[str]
download_url: str
classifiers: List[str]
requires: List[str]
provides: List[str]
obsoletes: List[str]
# Metadata 1.2 - PEP 345
maintainer: str
maintainer_email: str
requires_dist: List[str]
provides_dist: List[str]
obsoletes_dist: List[str]
requires_python: str
requires_external: List[str]
project_urls: Dict[str, str]
# Metadata 2.0
# PEP 426 attempted to completely revamp the metadata format
# but got stuck without ever being able to build consensus on
# it and ultimately ended up withdrawn.
#
# However, a number of tools had started emiting METADATA with
# `2.0` Metadata-Version, so for historical reasons, this version
# was skipped.
# Metadata 2.1 - PEP 566
description_content_type: str
provides_extra: List[str]
# Metadata 2.2 - PEP 643
dynamic: List[str]
# Metadata 2.3 - PEP 685
# No new fields were added in PEP 685, just some edge case were
# tightened up to provide better interoptability.
_STRING_FIELDS = {
"author",
"author_email",
"description",
"description_content_type",
"download_url",
"home_page",
"license",
"maintainer",
"maintainer_email",
"metadata_version",
"name",
"requires_python",
"summary",
"version",
}
_LIST_STRING_FIELDS = {
"classifiers",
"dynamic",
"obsoletes",
"obsoletes_dist",
"platforms",
"provides",
"provides_dist",
"provides_extra",
"requires",
"requires_dist",
"requires_external",
"supported_platforms",
}
def _parse_keywords(data: str) -> List[str]:
"""Split a string of comma-separate keyboards into a list of keywords."""
return [k.strip() for k in data.split(",")]
def _parse_project_urls(data: List[str]) -> Dict[str, str]:
"""Parse a list of label/URL string pairings separated by a comma."""
urls = {}
for pair in data:
# Our logic is slightly tricky here as we want to try and do
# *something* reasonable with malformed data.
#
# The main thing that we have to worry about, is data that does
# not have a ',' at all to split the label from the Value. There
# isn't a singular right answer here, and we will fail validation
# later on (if the caller is validating) so it doesn't *really*
# matter, but since the missing value has to be an empty str
# and our return value is dict[str, str], if we let the key
# be the missing value, then they'd have multiple '' values that
# overwrite each other in a accumulating dict.
#
# The other potentional issue is that it's possible to have the
# same label multiple times in the metadata, with no solid "right"
# answer with what to do in that case. As such, we'll do the only
# thing we can, which is treat the field as unparseable and add it
# to our list of unparsed fields.
parts = [p.strip() for p in pair.split(",", 1)]
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
# TODO: The spec doesn't say anything about if the keys should be
# considered case sensitive or not... logically they should
# be case-preserving and case-insensitive, but doing that
# would open up more cases where we might have duplicate
# entries.
label, url = parts
if label in urls:
# The label already exists in our set of urls, so this field
# is unparseable, and we can just add the whole thing to our
# unparseable data and stop processing it.
raise KeyError("duplicate labels in project urls")
urls[label] = url
return urls
def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
"""Get the body of the message."""
# If our source is a str, then our caller has managed encodings for us,
# and we don't need to deal with it.
if isinstance(source, str):
payload: str = msg.get_payload()
return payload
# If our source is a bytes, then we're managing the encoding and we need
# to deal with it.
else:
bpayload: bytes = msg.get_payload(decode=True)
try:
return bpayload.decode("utf8", "strict")
except UnicodeDecodeError:
raise ValueError("payload in an invalid encoding")
# The various parse_FORMAT functions here are intended to be as lenient as
# possible in their parsing, while still returning a correctly typed
# RawMetadata.
#
# To aid in this, we also generally want to do as little touching of the
# data as possible, except where there are possibly some historic holdovers
# that make valid data awkward to work with.
#
# While this is a lower level, intermediate format than our ``Metadata``
# class, some light touch ups can make a massive difference in usability.
# Map METADATA fields to RawMetadata.
_EMAIL_TO_RAW_MAPPING = {
"author": "author",
"author-email": "author_email",
"classifier": "classifiers",
"description": "description",
"description-content-type": "description_content_type",
"download-url": "download_url",
"dynamic": "dynamic",
"home-page": "home_page",
"keywords": "keywords",
"license": "license",
"maintainer": "maintainer",
"maintainer-email": "maintainer_email",
"metadata-version": "metadata_version",
"name": "name",
"obsoletes": "obsoletes",
"obsoletes-dist": "obsoletes_dist",
"platform": "platforms",
"project-url": "project_urls",
"provides": "provides",
"provides-dist": "provides_dist",
"provides-extra": "provides_extra",
"requires": "requires",
"requires-dist": "requires_dist",
"requires-external": "requires_external",
"requires-python": "requires_python",
"summary": "summary",
"supported-platform": "supported_platforms",
"version": "version",
}
def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
"""Parse a distribution's metadata.
This function returns a two-item tuple of dicts. The first dict is of
recognized fields from the core metadata specification. Fields that can be
parsed and translated into Python's built-in types are converted
appropriately. All other fields are left as-is. Fields that are allowed to
appear multiple times are stored as lists.
The second dict contains all other fields from the metadata. This includes
any unrecognized fields. It also includes any fields which are expected to
be parsed into a built-in type but were not formatted appropriately. Finally,
any fields that are expected to appear only once but are repeated are
included in this dict.
"""
raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
unparsed: Dict[str, List[str]] = {}
if isinstance(data, str):
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
else:
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
# We have to wrap parsed.keys() in a set, because in the case of multiple
# values for a key (a list), the key will appear multiple times in the
# list of keys, but we're avoiding that by using get_all().
for name in frozenset(parsed.keys()):
# Header names in RFC are case insensitive, so we'll normalize to all
# lower case to make comparisons easier.
name = name.lower()
# We use get_all() here, even for fields that aren't multiple use,
# because otherwise someone could have e.g. two Name fields, and we
# would just silently ignore it rather than doing something about it.
headers = parsed.get_all(name)
# The way the email module works when parsing bytes is that it
# unconditionally decodes the bytes as ascii using the surrogateescape
# handler. When you pull that data back out (such as with get_all() ),
# it looks to see if the str has any surrogate escapes, and if it does
# it wraps it in a Header object instead of returning the string.
#
# As such, we'll look for those Header objects, and fix up the encoding.
value = []
# Flag if we have run into any issues processing the headers, thus
# signalling that the data belongs in 'unparsed'.
valid_encoding = True
for h in headers:
# It's unclear if this can return more types than just a Header or
# a str, so we'll just assert here to make sure.
assert isinstance(h, (email.header.Header, str))
# If it's a header object, we need to do our little dance to get
# the real data out of it. In cases where there is invalid data
# we're going to end up with mojibake, but there's no obvious, good
# way around that without reimplementing parts of the Header object
# ourselves.
#
# That should be fine since, if mojibacked happens, this key is
# going into the unparsed dict anyways.
if isinstance(h, email.header.Header):
# The Header object stores it's data as chunks, and each chunk
# can be independently encoded, so we'll need to check each
# of them.
chunks: List[Tuple[bytes, Optional[str]]] = []
for bin, encoding in email.header.decode_header(h):
try:
bin.decode("utf8", "strict")
except UnicodeDecodeError:
# Enable mojibake.
encoding = "latin1"
valid_encoding = False
else:
encoding = "utf8"
chunks.append((bin, encoding))
# Turn our chunks back into a Header object, then let that
# Header object do the right thing to turn them into a
# string for us.
value.append(str(email.header.make_header(chunks)))
# This is already a string, so just add it.
else:
value.append(h)
# We've processed all of our values to get them into a list of str,
# but we may have mojibake data, in which case this is an unparsed
# field.
if not valid_encoding:
unparsed[name] = value
continue
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
if raw_name is None:
# This is a bit of a weird situation, we've encountered a key that
# we don't know what it means, so we don't know whether it's meant
# to be a list or not.
#
# Since we can't really tell one way or another, we'll just leave it
# as a list, even though it may be a single item list, because that's
# what makes the most sense for email headers.
unparsed[name] = value
continue
# If this is one of our string fields, then we'll check to see if our
# value is a list of a single item. If it is then we'll assume that
# it was emitted as a single string, and unwrap the str from inside
# the list.
#
# If it's any other kind of data, then we haven't the faintest clue
# what we should parse it as, and we have to just add it to our list
# of unparsed stuff.
if raw_name in _STRING_FIELDS and len(value) == 1:
raw[raw_name] = value[0]
# If this is one of our list of string fields, then we can just assign
# the value, since email *only* has strings, and our get_all() call
# above ensures that this is a list.
elif raw_name in _LIST_STRING_FIELDS:
raw[raw_name] = value
# Special Case: Keywords
# The keywords field is implemented in the metadata spec as a str,
# but it conceptually is a list of strings, and is serialized using
# ", ".join(keywords), so we'll do some light data massaging to turn
# this into what it logically is.
elif raw_name == "keywords" and len(value) == 1:
raw[raw_name] = _parse_keywords(value[0])
# Special Case: Project-URL
# The project urls is implemented in the metadata spec as a list of
# specially-formatted strings that represent a key and a value, which
# is fundamentally a mapping, however the email format doesn't support
# mappings in a sane way, so it was crammed into a list of strings
# instead.
#
# We will do a little light data massaging to turn this into a map as
# it logically should be.
elif raw_name == "project_urls":
try:
raw[raw_name] = _parse_project_urls(value)
except KeyError:
unparsed[name] = value
# Nothing that we've done has managed to parse this, so it'll just
# throw it in our unparseable data and move on.
else:
unparsed[name] = value
# We need to support getting the Description from the message payload in
# addition to getting it from the the headers. This does mean, though, there
# is the possibility of it being set both ways, in which case we put both
# in 'unparsed' since we don't know which is right.
try:
payload = _get_payload(parsed, data)
except ValueError:
unparsed.setdefault("description", []).append(
parsed.get_payload(decode=isinstance(data, bytes))
)
else:
if payload:
# Check to see if we've already got a description, if so then both
# it, and this body move to unparseable.
if "description" in raw:
description_header = cast(str, raw.pop("description"))
unparsed.setdefault("description", []).extend(
[description_header, payload]
)
elif "description" in unparsed:
unparsed["description"].append(payload)
else:
raw["description"] = payload
# We need to cast our `raw` to a metadata, because a TypedDict only support
# literal key names, but we're computing our key names on purpose, but the
# way this function is implemented, our `TypedDict` can only have valid key
# names.
return cast(RawMetadata, raw), unparsed

View file

@ -0,0 +1,95 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import urllib.parse
from typing import Any, List, Optional, Set
from ._parser import parse_requirement as _parse_requirement
from ._tokenizer import ParserSyntaxError
from .markers import Marker, _normalize_extra_values
from .specifiers import SpecifierSet
class InvalidRequirement(ValueError):
"""
An invalid requirement was found, users should refer to PEP 508.
"""
class Requirement:
"""Parse a requirement.
Parse a given requirement string into its parts, such as name, specifier,
URL, and extras. Raises InvalidRequirement on a badly-formed requirement
string.
"""
# TODO: Can we test whether something is contained within a requirement?
# If so how do we do that? Do we need to test against the _name_ of
# the thing as well as the version? What about the markers?
# TODO: Can we normalize the name and extra name?
def __init__(self, requirement_string: str) -> None:
try:
parsed = _parse_requirement(requirement_string)
except ParserSyntaxError as e:
raise InvalidRequirement(str(e)) from e
self.name: str = parsed.name
if parsed.url:
parsed_url = urllib.parse.urlparse(parsed.url)
if parsed_url.scheme == "file":
if urllib.parse.urlunparse(parsed_url) != parsed.url:
raise InvalidRequirement("Invalid URL given")
elif not (parsed_url.scheme and parsed_url.netloc) or (
not parsed_url.scheme and not parsed_url.netloc
):
raise InvalidRequirement(f"Invalid URL: {parsed.url}")
self.url: Optional[str] = parsed.url
else:
self.url = None
self.extras: Set[str] = set(parsed.extras if parsed.extras else [])
self.specifier: SpecifierSet = SpecifierSet(parsed.specifier)
self.marker: Optional[Marker] = None
if parsed.marker is not None:
self.marker = Marker.__new__(Marker)
self.marker._markers = _normalize_extra_values(parsed.marker)
def __str__(self) -> str:
parts: List[str] = [self.name]
if self.extras:
formatted_extras = ",".join(sorted(self.extras))
parts.append(f"[{formatted_extras}]")
if self.specifier:
parts.append(str(self.specifier))
if self.url:
parts.append(f"@ {self.url}")
if self.marker:
parts.append(" ")
if self.marker:
parts.append(f"; {self.marker}")
return "".join(parts)
def __repr__(self) -> str:
return f"<Requirement('{self}')>"
def __hash__(self) -> int:
return hash((self.__class__.__name__, str(self)))
def __eq__(self, other: Any) -> bool:
if not isinstance(other, Requirement):
return NotImplemented
return (
self.name == other.name
and self.extras == other.extras
and self.specifier == other.specifier
and self.url == other.url
and self.marker == other.marker
)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,546 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import logging
import platform
import subprocess
import sys
import sysconfig
from importlib.machinery import EXTENSION_SUFFIXES
from typing import (
Dict,
FrozenSet,
Iterable,
Iterator,
List,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from . import _manylinux, _musllinux
logger = logging.getLogger(__name__)
PythonVersion = Sequence[int]
MacVersion = Tuple[int, int]
INTERPRETER_SHORT_NAMES: Dict[str, str] = {
"python": "py", # Generic.
"cpython": "cp",
"pypy": "pp",
"ironpython": "ip",
"jython": "jy",
}
_32_BIT_INTERPRETER = sys.maxsize <= 2**32
class Tag:
"""
A representation of the tag triple for a wheel.
Instances are considered immutable and thus are hashable. Equality checking
is also supported.
"""
__slots__ = ["_interpreter", "_abi", "_platform", "_hash"]
def __init__(self, interpreter: str, abi: str, platform: str) -> None:
self._interpreter = interpreter.lower()
self._abi = abi.lower()
self._platform = platform.lower()
# The __hash__ of every single element in a Set[Tag] will be evaluated each time
# that a set calls its `.disjoint()` method, which may be called hundreds of
# times when scanning a page of links for packages with tags matching that
# Set[Tag]. Pre-computing the value here produces significant speedups for
# downstream consumers.
self._hash = hash((self._interpreter, self._abi, self._platform))
@property
def interpreter(self) -> str:
return self._interpreter
@property
def abi(self) -> str:
return self._abi
@property
def platform(self) -> str:
return self._platform
def __eq__(self, other: object) -> bool:
if not isinstance(other, Tag):
return NotImplemented
return (
(self._hash == other._hash) # Short-circuit ASAP for perf reasons.
and (self._platform == other._platform)
and (self._abi == other._abi)
and (self._interpreter == other._interpreter)
)
def __hash__(self) -> int:
return self._hash
def __str__(self) -> str:
return f"{self._interpreter}-{self._abi}-{self._platform}"
def __repr__(self) -> str:
return f"<{self} @ {id(self)}>"
def parse_tag(tag: str) -> FrozenSet[Tag]:
"""
Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances.
Returning a set is required due to the possibility that the tag is a
compressed tag set.
"""
tags = set()
interpreters, abis, platforms = tag.split("-")
for interpreter in interpreters.split("."):
for abi in abis.split("."):
for platform_ in platforms.split("."):
tags.add(Tag(interpreter, abi, platform_))
return frozenset(tags)
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
value: Union[int, str, None] = sysconfig.get_config_var(name)
if value is None and warn:
logger.debug(
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
)
return value
def _normalize_string(string: str) -> str:
return string.replace(".", "_").replace("-", "_").replace(" ", "_")
def _abi3_applies(python_version: PythonVersion) -> bool:
"""
Determine if the Python version supports abi3.
PEP 384 was first implemented in Python 3.2.
"""
return len(python_version) > 1 and tuple(python_version) >= (3, 2)
def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]:
py_version = tuple(py_version) # To allow for version comparison.
abis = []
version = _version_nodot(py_version[:2])
debug = pymalloc = ucs4 = ""
with_debug = _get_config_var("Py_DEBUG", warn)
has_refcount = hasattr(sys, "gettotalrefcount")
# Windows doesn't set Py_DEBUG, so checking for support of debug-compiled
# extension modules is the best option.
# https://github.com/pypa/pip/issues/3383#issuecomment-173267692
has_ext = "_d.pyd" in EXTENSION_SUFFIXES
if with_debug or (with_debug is None and (has_refcount or has_ext)):
debug = "d"
if py_version < (3, 8):
with_pymalloc = _get_config_var("WITH_PYMALLOC", warn)
if with_pymalloc or with_pymalloc is None:
pymalloc = "m"
if py_version < (3, 3):
unicode_size = _get_config_var("Py_UNICODE_SIZE", warn)
if unicode_size == 4 or (
unicode_size is None and sys.maxunicode == 0x10FFFF
):
ucs4 = "u"
elif debug:
# Debug builds can also load "normal" extension modules.
# We can also assume no UCS-4 or pymalloc requirement.
abis.append(f"cp{version}")
abis.insert(
0,
"cp{version}{debug}{pymalloc}{ucs4}".format(
version=version, debug=debug, pymalloc=pymalloc, ucs4=ucs4
),
)
return abis
def cpython_tags(
python_version: Optional[PythonVersion] = None,
abis: Optional[Iterable[str]] = None,
platforms: Optional[Iterable[str]] = None,
*,
warn: bool = False,
) -> Iterator[Tag]:
"""
Yields the tags for a CPython interpreter.
The tags consist of:
- cp<python_version>-<abi>-<platform>
- cp<python_version>-abi3-<platform>
- cp<python_version>-none-<platform>
- cp<less than python_version>-abi3-<platform> # Older Python versions down to 3.2.
If python_version only specifies a major version then user-provided ABIs and
the 'none' ABItag will be used.
If 'abi3' or 'none' are specified in 'abis' then they will be yielded at
their normal position and not at the beginning.
"""
if not python_version:
python_version = sys.version_info[:2]
interpreter = f"cp{_version_nodot(python_version[:2])}"
if abis is None:
if len(python_version) > 1:
abis = _cpython_abis(python_version, warn)
else:
abis = []
abis = list(abis)
# 'abi3' and 'none' are explicitly handled later.
for explicit_abi in ("abi3", "none"):
try:
abis.remove(explicit_abi)
except ValueError:
pass
platforms = list(platforms or platform_tags())
for abi in abis:
for platform_ in platforms:
yield Tag(interpreter, abi, platform_)
if _abi3_applies(python_version):
yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms)
yield from (Tag(interpreter, "none", platform_) for platform_ in platforms)
if _abi3_applies(python_version):
for minor_version in range(python_version[1] - 1, 1, -1):
for platform_ in platforms:
interpreter = "cp{version}".format(
version=_version_nodot((python_version[0], minor_version))
)
yield Tag(interpreter, "abi3", platform_)
def _generic_abi() -> List[str]:
"""
Return the ABI tag based on EXT_SUFFIX.
"""
# The following are examples of `EXT_SUFFIX`.
# We want to keep the parts which are related to the ABI and remove the
# parts which are related to the platform:
# - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310
# - mac: '.cpython-310-darwin.so' => cp310
# - win: '.cp310-win_amd64.pyd' => cp310
# - win: '.pyd' => cp37 (uses _cpython_abis())
# - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73
# - graalpy: '.graalpy-38-native-x86_64-darwin.dylib'
# => graalpy_38_native
ext_suffix = _get_config_var("EXT_SUFFIX", warn=True)
if not isinstance(ext_suffix, str) or ext_suffix[0] != ".":
raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')")
parts = ext_suffix.split(".")
if len(parts) < 3:
# CPython3.7 and earlier uses ".pyd" on Windows.
return _cpython_abis(sys.version_info[:2])
soabi = parts[1]
if soabi.startswith("cpython"):
# non-windows
abi = "cp" + soabi.split("-")[1]
elif soabi.startswith("cp"):
# windows
abi = soabi.split("-")[0]
elif soabi.startswith("pypy"):
abi = "-".join(soabi.split("-")[:2])
elif soabi.startswith("graalpy"):
abi = "-".join(soabi.split("-")[:3])
elif soabi:
# pyston, ironpython, others?
abi = soabi
else:
return []
return [_normalize_string(abi)]
def generic_tags(
interpreter: Optional[str] = None,
abis: Optional[Iterable[str]] = None,
platforms: Optional[Iterable[str]] = None,
*,
warn: bool = False,
) -> Iterator[Tag]:
"""
Yields the tags for a generic interpreter.
The tags consist of:
- <interpreter>-<abi>-<platform>
The "none" ABI will be added if it was not explicitly provided.
"""
if not interpreter:
interp_name = interpreter_name()
interp_version = interpreter_version(warn=warn)
interpreter = "".join([interp_name, interp_version])
if abis is None:
abis = _generic_abi()
else:
abis = list(abis)
platforms = list(platforms or platform_tags())
if "none" not in abis:
abis.append("none")
for abi in abis:
for platform_ in platforms:
yield Tag(interpreter, abi, platform_)
def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]:
"""
Yields Python versions in descending order.
After the latest version, the major-only version will be yielded, and then
all previous versions of that major version.
"""
if len(py_version) > 1:
yield f"py{_version_nodot(py_version[:2])}"
yield f"py{py_version[0]}"
if len(py_version) > 1:
for minor in range(py_version[1] - 1, -1, -1):
yield f"py{_version_nodot((py_version[0], minor))}"
def compatible_tags(
python_version: Optional[PythonVersion] = None,
interpreter: Optional[str] = None,
platforms: Optional[Iterable[str]] = None,
) -> Iterator[Tag]:
"""
Yields the sequence of tags that are compatible with a specific version of Python.
The tags consist of:
- py*-none-<platform>
- <interpreter>-none-any # ... if `interpreter` is provided.
- py*-none-any
"""
if not python_version:
python_version = sys.version_info[:2]
platforms = list(platforms or platform_tags())
for version in _py_interpreter_range(python_version):
for platform_ in platforms:
yield Tag(version, "none", platform_)
if interpreter:
yield Tag(interpreter, "none", "any")
for version in _py_interpreter_range(python_version):
yield Tag(version, "none", "any")
def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str:
if not is_32bit:
return arch
if arch.startswith("ppc"):
return "ppc"
return "i386"
def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]:
formats = [cpu_arch]
if cpu_arch == "x86_64":
if version < (10, 4):
return []
formats.extend(["intel", "fat64", "fat32"])
elif cpu_arch == "i386":
if version < (10, 4):
return []
formats.extend(["intel", "fat32", "fat"])
elif cpu_arch == "ppc64":
# TODO: Need to care about 32-bit PPC for ppc64 through 10.2?
if version > (10, 5) or version < (10, 4):
return []
formats.append("fat64")
elif cpu_arch == "ppc":
if version > (10, 6):
return []
formats.extend(["fat32", "fat"])
if cpu_arch in {"arm64", "x86_64"}:
formats.append("universal2")
if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}:
formats.append("universal")
return formats
def mac_platforms(
version: Optional[MacVersion] = None, arch: Optional[str] = None
) -> Iterator[str]:
"""
Yields the platform tags for a macOS system.
The `version` parameter is a two-item tuple specifying the macOS version to
generate platform tags for. The `arch` parameter is the CPU architecture to
generate platform tags for. Both parameters default to the appropriate value
for the current system.
"""
version_str, _, cpu_arch = platform.mac_ver()
if version is None:
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
if version == (10, 16):
# When built against an older macOS SDK, Python will report macOS 10.16
# instead of the real version.
version_str = subprocess.run(
[
sys.executable,
"-sS",
"-c",
"import platform; print(platform.mac_ver()[0])",
],
check=True,
env={"SYSTEM_VERSION_COMPAT": "0"},
stdout=subprocess.PIPE,
universal_newlines=True,
).stdout
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
else:
version = version
if arch is None:
arch = _mac_arch(cpu_arch)
else:
arch = arch
if (10, 0) <= version and version < (11, 0):
# Prior to Mac OS 11, each yearly release of Mac OS bumped the
# "minor" version number. The major version was always 10.
for minor_version in range(version[1], -1, -1):
compat_version = 10, minor_version
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=10, minor=minor_version, binary_format=binary_format
)
if version >= (11, 0):
# Starting with Mac OS 11, each yearly release bumps the major version
# number. The minor versions are now the midyear updates.
for major_version in range(version[0], 10, -1):
compat_version = major_version, 0
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=major_version, minor=0, binary_format=binary_format
)
if version >= (11, 0):
# Mac OS 11 on x86_64 is compatible with binaries from previous releases.
# Arm64 support was introduced in 11.0, so no Arm binaries from previous
# releases exist.
#
# However, the "universal2" binary format can have a
# macOS version earlier than 11.0 when the x86_64 part of the binary supports
# that version of macOS.
if arch == "x86_64":
for minor_version in range(16, 3, -1):
compat_version = 10, minor_version
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=compat_version[0],
minor=compat_version[1],
binary_format=binary_format,
)
else:
for minor_version in range(16, 3, -1):
compat_version = 10, minor_version
binary_format = "universal2"
yield "macosx_{major}_{minor}_{binary_format}".format(
major=compat_version[0],
minor=compat_version[1],
binary_format=binary_format,
)
def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]:
linux = _normalize_string(sysconfig.get_platform())
if is_32bit:
if linux == "linux_x86_64":
linux = "linux_i686"
elif linux == "linux_aarch64":
linux = "linux_armv7l"
_, arch = linux.split("_", 1)
yield from _manylinux.platform_tags(linux, arch)
yield from _musllinux.platform_tags(arch)
yield linux
def _generic_platforms() -> Iterator[str]:
yield _normalize_string(sysconfig.get_platform())
def platform_tags() -> Iterator[str]:
"""
Provides the platform tags for this installation.
"""
if platform.system() == "Darwin":
return mac_platforms()
elif platform.system() == "Linux":
return _linux_platforms()
else:
return _generic_platforms()
def interpreter_name() -> str:
"""
Returns the name of the running interpreter.
Some implementations have a reserved, two-letter abbreviation which will
be returned when appropriate.
"""
name = sys.implementation.name
return INTERPRETER_SHORT_NAMES.get(name) or name
def interpreter_version(*, warn: bool = False) -> str:
"""
Returns the version of the running interpreter.
"""
version = _get_config_var("py_version_nodot", warn=warn)
if version:
version = str(version)
else:
version = _version_nodot(sys.version_info[:2])
return version
def _version_nodot(version: PythonVersion) -> str:
return "".join(map(str, version))
def sys_tags(*, warn: bool = False) -> Iterator[Tag]:
"""
Returns the sequence of tag triples for the running interpreter.
The order of the sequence corresponds to priority order for the
interpreter, from most to least important.
"""
interp_name = interpreter_name()
if interp_name == "cp":
yield from cpython_tags(warn=warn)
else:
yield from generic_tags()
if interp_name == "pp":
interp = "pp3"
elif interp_name == "cp":
interp = "cp" + interpreter_version(warn=warn)
else:
interp = None
yield from compatible_tags(interpreter=interp)

View file

@ -0,0 +1,141 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import re
from typing import FrozenSet, NewType, Tuple, Union, cast
from .tags import Tag, parse_tag
from .version import InvalidVersion, Version
BuildTag = Union[Tuple[()], Tuple[int, str]]
NormalizedName = NewType("NormalizedName", str)
class InvalidWheelFilename(ValueError):
"""
An invalid wheel filename was found, users should refer to PEP 427.
"""
class InvalidSdistFilename(ValueError):
"""
An invalid sdist filename was found, users should refer to the packaging user guide.
"""
_canonicalize_regex = re.compile(r"[-_.]+")
# PEP 427: The build number must start with a digit.
_build_tag_regex = re.compile(r"(\d+)(.*)")
def canonicalize_name(name: str) -> NormalizedName:
# This is taken from PEP 503.
value = _canonicalize_regex.sub("-", name).lower()
return cast(NormalizedName, value)
def canonicalize_version(
version: Union[Version, str], *, strip_trailing_zero: bool = True
) -> str:
"""
This is very similar to Version.__str__, but has one subtle difference
with the way it handles the release segment.
"""
if isinstance(version, str):
try:
parsed = Version(version)
except InvalidVersion:
# Legacy versions cannot be normalized
return version
else:
parsed = version
parts = []
# Epoch
if parsed.epoch != 0:
parts.append(f"{parsed.epoch}!")
# Release segment
release_segment = ".".join(str(x) for x in parsed.release)
if strip_trailing_zero:
# NB: This strips trailing '.0's to normalize
release_segment = re.sub(r"(\.0)+$", "", release_segment)
parts.append(release_segment)
# Pre-release
if parsed.pre is not None:
parts.append("".join(str(x) for x in parsed.pre))
# Post-release
if parsed.post is not None:
parts.append(f".post{parsed.post}")
# Development release
if parsed.dev is not None:
parts.append(f".dev{parsed.dev}")
# Local version segment
if parsed.local is not None:
parts.append(f"+{parsed.local}")
return "".join(parts)
def parse_wheel_filename(
filename: str,
) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]:
if not filename.endswith(".whl"):
raise InvalidWheelFilename(
f"Invalid wheel filename (extension must be '.whl'): {filename}"
)
filename = filename[:-4]
dashes = filename.count("-")
if dashes not in (4, 5):
raise InvalidWheelFilename(
f"Invalid wheel filename (wrong number of parts): {filename}"
)
parts = filename.split("-", dashes - 2)
name_part = parts[0]
# See PEP 427 for the rules on escaping the project name
if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None:
raise InvalidWheelFilename(f"Invalid project name: {filename}")
name = canonicalize_name(name_part)
version = Version(parts[1])
if dashes == 5:
build_part = parts[2]
build_match = _build_tag_regex.match(build_part)
if build_match is None:
raise InvalidWheelFilename(
f"Invalid build number: {build_part} in '{filename}'"
)
build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2)))
else:
build = ()
tags = parse_tag(parts[-1])
return (name, version, build, tags)
def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]:
if filename.endswith(".tar.gz"):
file_stem = filename[: -len(".tar.gz")]
elif filename.endswith(".zip"):
file_stem = filename[: -len(".zip")]
else:
raise InvalidSdistFilename(
f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):"
f" {filename}"
)
# We are requiring a PEP 440 version, which cannot contain dashes,
# so we split on the last dash.
name_part, sep, version_part = file_stem.rpartition("-")
if not sep:
raise InvalidSdistFilename(f"Invalid sdist filename: {filename}")
name = canonicalize_name(name_part)
version = Version(version_part)
return (name, version)

View file

@ -0,0 +1,564 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
"""
.. testsetup::
from packaging.version import parse, Version
"""
import collections
import itertools
import re
from typing import Any, Callable, Optional, SupportsInt, Tuple, Union
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"]
InfiniteTypes = Union[InfinityType, NegativeInfinityType]
PrePostDevType = Union[InfiniteTypes, Tuple[str, int]]
SubLocalType = Union[InfiniteTypes, int, str]
LocalType = Union[
NegativeInfinityType,
Tuple[
Union[
SubLocalType,
Tuple[SubLocalType, str],
Tuple[NegativeInfinityType, SubLocalType],
],
...,
],
]
CmpKey = Tuple[
int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType
]
VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool]
_Version = collections.namedtuple(
"_Version", ["epoch", "release", "dev", "pre", "post", "local"]
)
def parse(version: str) -> "Version":
"""Parse the given version string.
>>> parse('1.0.dev1')
<Version('1.0.dev1')>
:param version: The version string to parse.
:raises InvalidVersion: When the version string is not a valid version.
"""
return Version(version)
class InvalidVersion(ValueError):
"""Raised when a version string is not a valid version.
>>> Version("invalid")
Traceback (most recent call last):
...
packaging.version.InvalidVersion: Invalid version: 'invalid'
"""
class _BaseVersion:
_key: Tuple[Any, ...]
def __hash__(self) -> int:
return hash(self._key)
# Please keep the duplicated `isinstance` check
# in the six comparisons hereunder
# unless you find a way to avoid adding overhead function calls.
def __lt__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key < other._key
def __le__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key <= other._key
def __eq__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key == other._key
def __ge__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key >= other._key
def __gt__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key > other._key
def __ne__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key != other._key
# Deliberately not anchored to the start and end of the string, to make it
# easier for 3rd party code to reuse
_VERSION_PATTERN = r"""
v?
(?:
(?:(?P<epoch>[0-9]+)!)? # epoch
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
(?P<pre> # pre-release
[-_\.]?
(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
[-_\.]?
(?P<pre_n>[0-9]+)?
)?
(?P<post> # post release
(?:-(?P<post_n1>[0-9]+))
|
(?:
[-_\.]?
(?P<post_l>post|rev|r)
[-_\.]?
(?P<post_n2>[0-9]+)?
)
)?
(?P<dev> # dev release
[-_\.]?
(?P<dev_l>dev)
[-_\.]?
(?P<dev_n>[0-9]+)?
)?
)
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
"""
VERSION_PATTERN = _VERSION_PATTERN
"""
A string containing the regular expression used to match a valid version.
The pattern is not anchored at either end, and is intended for embedding in larger
expressions (for example, matching a version number as part of a file name). The
regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
flags set.
:meta hide-value:
"""
class Version(_BaseVersion):
"""This class abstracts handling of a project's versions.
A :class:`Version` instance is comparison aware and can be compared and
sorted using the standard Python interfaces.
>>> v1 = Version("1.0a5")
>>> v2 = Version("1.0")
>>> v1
<Version('1.0a5')>
>>> v2
<Version('1.0')>
>>> v1 < v2
True
>>> v1 == v2
False
>>> v1 > v2
False
>>> v1 >= v2
False
>>> v1 <= v2
True
"""
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_key: CmpKey
def __init__(self, version: str) -> None:
"""Initialize a Version object.
:param version:
The string representation of a version which will be parsed and normalized
before use.
:raises InvalidVersion:
If the ``version`` does not conform to PEP 440 in any way then this
exception will be raised.
"""
# Validate the version and parse it into pieces
match = self._regex.search(version)
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")
# Store the parsed out pieces of the version
self._version = _Version(
epoch=int(match.group("epoch")) if match.group("epoch") else 0,
release=tuple(int(i) for i in match.group("release").split(".")),
pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
post=_parse_letter_version(
match.group("post_l"), match.group("post_n1") or match.group("post_n2")
),
dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
local=_parse_local_version(match.group("local")),
)
# Generate a key which will be used for sorting
self._key = _cmpkey(
self._version.epoch,
self._version.release,
self._version.pre,
self._version.post,
self._version.dev,
self._version.local,
)
def __repr__(self) -> str:
"""A representation of the Version that shows all internal state.
>>> Version('1.0.0')
<Version('1.0.0')>
"""
return f"<Version('{self}')>"
def __str__(self) -> str:
"""A string representation of the version that can be rounded-tripped.
>>> str(Version("1.0a5"))
'1.0a5'
"""
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
# Pre-release
if self.pre is not None:
parts.append("".join(str(x) for x in self.pre))
# Post-release
if self.post is not None:
parts.append(f".post{self.post}")
# Development release
if self.dev is not None:
parts.append(f".dev{self.dev}")
# Local version segment
if self.local is not None:
parts.append(f"+{self.local}")
return "".join(parts)
@property
def epoch(self) -> int:
"""The epoch of the version.
>>> Version("2.0.0").epoch
0
>>> Version("1!2.0.0").epoch
1
"""
_epoch: int = self._version.epoch
return _epoch
@property
def release(self) -> Tuple[int, ...]:
"""The components of the "release" segment of the version.
>>> Version("1.2.3").release
(1, 2, 3)
>>> Version("2.0.0").release
(2, 0, 0)
>>> Version("1!2.0.0.post0").release
(2, 0, 0)
Includes trailing zeroes but not the epoch or any pre-release / development /
post-release suffixes.
"""
_release: Tuple[int, ...] = self._version.release
return _release
@property
def pre(self) -> Optional[Tuple[str, int]]:
"""The pre-release segment of the version.
>>> print(Version("1.2.3").pre)
None
>>> Version("1.2.3a1").pre
('a', 1)
>>> Version("1.2.3b1").pre
('b', 1)
>>> Version("1.2.3rc1").pre
('rc', 1)
"""
_pre: Optional[Tuple[str, int]] = self._version.pre
return _pre
@property
def post(self) -> Optional[int]:
"""The post-release number of the version.
>>> print(Version("1.2.3").post)
None
>>> Version("1.2.3.post1").post
1
"""
return self._version.post[1] if self._version.post else None
@property
def dev(self) -> Optional[int]:
"""The development number of the version.
>>> print(Version("1.2.3").dev)
None
>>> Version("1.2.3.dev1").dev
1
"""
return self._version.dev[1] if self._version.dev else None
@property
def local(self) -> Optional[str]:
"""The local version segment of the version.
>>> print(Version("1.2.3").local)
None
>>> Version("1.2.3+abc").local
'abc'
"""
if self._version.local:
return ".".join(str(x) for x in self._version.local)
else:
return None
@property
def public(self) -> str:
"""The public portion of the version.
>>> Version("1.2.3").public
'1.2.3'
>>> Version("1.2.3+abc").public
'1.2.3'
>>> Version("1.2.3+abc.dev1").public
'1.2.3'
"""
return str(self).split("+", 1)[0]
@property
def base_version(self) -> str:
"""The "base version" of the version.
>>> Version("1.2.3").base_version
'1.2.3'
>>> Version("1.2.3+abc").base_version
'1.2.3'
>>> Version("1!1.2.3+abc.dev1").base_version
'1!1.2.3'
The "base version" is the public version of the project without any pre or post
release markers.
"""
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
return "".join(parts)
@property
def is_prerelease(self) -> bool:
"""Whether this version is a pre-release.
>>> Version("1.2.3").is_prerelease
False
>>> Version("1.2.3a1").is_prerelease
True
>>> Version("1.2.3b1").is_prerelease
True
>>> Version("1.2.3rc1").is_prerelease
True
>>> Version("1.2.3dev1").is_prerelease
True
"""
return self.dev is not None or self.pre is not None
@property
def is_postrelease(self) -> bool:
"""Whether this version is a post-release.
>>> Version("1.2.3").is_postrelease
False
>>> Version("1.2.3.post1").is_postrelease
True
"""
return self.post is not None
@property
def is_devrelease(self) -> bool:
"""Whether this version is a development release.
>>> Version("1.2.3").is_devrelease
False
>>> Version("1.2.3.dev1").is_devrelease
True
"""
return self.dev is not None
@property
def major(self) -> int:
"""The first item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").major
1
"""
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
"""The second item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").minor
2
>>> Version("1").minor
0
"""
return self.release[1] if len(self.release) >= 2 else 0
@property
def micro(self) -> int:
"""The third item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").micro
3
>>> Version("1").micro
0
"""
return self.release[2] if len(self.release) >= 3 else 0
def _parse_letter_version(
letter: str, number: Union[str, bytes, SupportsInt]
) -> Optional[Tuple[str, int]]:
if letter:
# We consider there to be an implicit 0 in a pre-release if there is
# not a numeral associated with it.
if number is None:
number = 0
# We normalize any letters to their lower case form
letter = letter.lower()
# We consider some words to be alternate spellings of other words and
# in those cases we want to normalize the spellings to our preferred
# spelling.
if letter == "alpha":
letter = "a"
elif letter == "beta":
letter = "b"
elif letter in ["c", "pre", "preview"]:
letter = "rc"
elif letter in ["rev", "r"]:
letter = "post"
return letter, int(number)
if not letter and number:
# We assume if we are given a number, but we are not given a letter
# then this is using the implicit post release syntax (e.g. 1.0-1)
letter = "post"
return letter, int(number)
return None
_local_version_separators = re.compile(r"[\._-]")
def _parse_local_version(local: str) -> Optional[LocalType]:
"""
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
"""
if local is not None:
return tuple(
part.lower() if not part.isdigit() else int(part)
for part in _local_version_separators.split(local)
)
return None
def _cmpkey(
epoch: int,
release: Tuple[int, ...],
pre: Optional[Tuple[str, int]],
post: Optional[Tuple[str, int]],
dev: Optional[Tuple[str, int]],
local: Optional[Tuple[SubLocalType]],
) -> CmpKey:
# When we compare a release version, we want to compare it with all of the
# trailing zeros removed. So we'll use a reverse the list, drop all the now
# leading zeros until we come to something non zero, then take the rest
# re-reverse it back into the correct order and make it a tuple and use
# that for our sorting key.
_release = tuple(
reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
)
# We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
# We'll do this by abusing the pre segment, but we _only_ want to do this
# if there is not a pre or a post segment. If we have one of those then
# the normal sorting rules will handle this case correctly.
if pre is None and post is None and dev is not None:
_pre: PrePostDevType = NegativeInfinity
# Versions without a pre-release (except as noted above) should sort after
# those with one.
elif pre is None:
_pre = Infinity
else:
_pre = pre
# Versions without a post segment should sort before those with one.
if post is None:
_post: PrePostDevType = NegativeInfinity
else:
_post = post
# Versions without a development segment should sort after those with one.
if dev is None:
_dev: PrePostDevType = Infinity
else:
_dev = dev
if local is None:
# Versions without a local segment should sort before those with one.
_local: LocalType = NegativeInfinity
else:
# Versions with a local segment need that segment parsed to implement
# the sorting rules in PEP440.
# - Alpha numeric segments sort before numeric segments
# - Alpha numeric segments sort lexicographically
# - Numeric segments sort numerically
# - Shorter versions sort before longer versions when the prefixes
# match exactly
_local = tuple(
(i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
)
return epoch, _release, _pre, _post, _dev, _local