1
0
Fork 0
forked from bton/matekasse

tests versuch 2

This commit is contained in:
2000-Trek 2023-07-28 23:30:45 +02:00
parent fdf385fe06
commit c88f7df83a
2363 changed files with 408191 additions and 0 deletions

View file

@ -0,0 +1,608 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2005-2010 ActiveState Software Inc.
# Copyright (c) 2013 Eddy Petrișor
"""Utilities for determining application-specific dirs.
See <http://github.com/ActiveState/appdirs> for details and usage.
"""
# Dev Notes:
# - MSDN on where to store app data files:
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
__version_info__ = (1, 4, 3)
__version__ = '.'.join(map(str, __version_info__))
import sys
import os
PY3 = sys.version_info[0] == 3
if PY3:
unicode = str
if sys.platform.startswith('java'):
import platform
os_name = platform.java_ver()[3][0]
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
system = 'win32'
elif os_name.startswith('Mac'): # "Mac OS X", etc.
system = 'darwin'
else: # "Linux", "SunOS", "FreeBSD", etc.
# Setting this to "linux2" is not ideal, but only Windows or Mac
# are actually checked for and the rest of the module expects
# *sys.platform* style strings.
system = 'linux2'
else:
system = sys.platform
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user data directories are:
Mac OS X: ~/Library/Application Support/<AppName>
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
That means, by default "~/.local/share/<AppName>".
"""
if system == "win32":
if appauthor is None:
appauthor = appname
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
path = os.path.normpath(_get_win_folder(const))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
elif system == 'darwin':
path = os.path.expanduser('~/Library/Application Support/')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
r"""Return full path to the user-shared data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"multipath" is an optional parameter only applicable to *nix
which indicates that the entire list of data dirs should be
returned. By default, the first item from XDG_DATA_DIRS is
returned, or '/usr/local/share/<AppName>',
if XDG_DATA_DIRS is not set
Typical site data directories are:
Mac OS X: /Library/Application Support/<AppName>
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
For Unix, this is using the $XDG_DATA_DIRS[0] default.
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
elif system == 'darwin':
path = os.path.expanduser('/Library/Application Support')
if appname:
path = os.path.join(path, appname)
else:
# XDG default for $XDG_DATA_DIRS
# only first, if multipath is False
path = os.getenv('XDG_DATA_DIRS',
os.pathsep.join(['/usr/local/share', '/usr/share']))
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
if appname:
if version:
appname = os.path.join(appname, version)
pathlist = [os.sep.join([x, appname]) for x in pathlist]
if multipath:
path = os.pathsep.join(pathlist)
else:
path = pathlist[0]
return path
if appname and version:
path = os.path.join(path, version)
return path
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific config dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user config directories are:
Mac OS X: same as user_data_dir
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
Win *: same as user_data_dir
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
That means, by default "~/.config/<AppName>".
"""
if system in ["win32", "darwin"]:
path = user_data_dir(appname, appauthor, None, roaming)
else:
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
r"""Return full path to the user-shared data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"multipath" is an optional parameter only applicable to *nix
which indicates that the entire list of config dirs should be
returned. By default, the first item from XDG_CONFIG_DIRS is
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
Typical site config directories are:
Mac OS X: same as site_data_dir
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
$XDG_CONFIG_DIRS
Win *: same as site_data_dir
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
"""
if system in ["win32", "darwin"]:
path = site_data_dir(appname, appauthor)
if appname and version:
path = os.path.join(path, version)
else:
# XDG default for $XDG_CONFIG_DIRS
# only first, if multipath is False
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
if appname:
if version:
appname = os.path.join(appname, version)
pathlist = [os.sep.join([x, appname]) for x in pathlist]
if multipath:
path = os.pathsep.join(pathlist)
else:
path = pathlist[0]
return path
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific cache dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Cache" to the base app data dir for Windows. See
discussion below.
Typical user cache directories are:
Mac OS X: ~/Library/Caches/<AppName>
Unix: ~/.cache/<AppName> (XDG default)
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
On Windows the only suggestion in the MSDN docs is that local settings go in
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
app data dir (the default returned by `user_data_dir` above). Apps typically
put cache data somewhere *under* the given dir here. Some examples:
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
...\Acme\SuperApp\Cache\1.0
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
This can be disabled with the `opinion=False` option.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
if opinion:
path = os.path.join(path, "Cache")
elif system == 'darwin':
path = os.path.expanduser('~/Library/Caches')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific state dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user state directories are:
Mac OS X: same as user_data_dir
Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined
Win *: same as user_data_dir
For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
to extend the XDG spec and support $XDG_STATE_HOME.
That means, by default "~/.local/state/<AppName>".
"""
if system in ["win32", "darwin"]:
path = user_data_dir(appname, appauthor, None, roaming)
else:
path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific log dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Logs" to the base app data dir for Windows, and "log" to the
base cache dir for Unix. See discussion below.
Typical user log directories are:
Mac OS X: ~/Library/Logs/<AppName>
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
On Windows the only suggestion in the MSDN docs is that local settings
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
examples of what some windows apps use for a logs dir.)
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
value for Windows and appends "log" to the user cache dir for Unix.
This can be disabled with the `opinion=False` option.
"""
if system == "darwin":
path = os.path.join(
os.path.expanduser('~/Library/Logs'),
appname)
elif system == "win32":
path = user_data_dir(appname, appauthor, version)
version = False
if opinion:
path = os.path.join(path, "Logs")
else:
path = user_cache_dir(appname, appauthor, version)
version = False
if opinion:
path = os.path.join(path, "log")
if appname and version:
path = os.path.join(path, version)
return path
class AppDirs(object):
"""Convenience wrapper for getting application dirs."""
def __init__(self, appname=None, appauthor=None, version=None,
roaming=False, multipath=False):
self.appname = appname
self.appauthor = appauthor
self.version = version
self.roaming = roaming
self.multipath = multipath
@property
def user_data_dir(self):
return user_data_dir(self.appname, self.appauthor,
version=self.version, roaming=self.roaming)
@property
def site_data_dir(self):
return site_data_dir(self.appname, self.appauthor,
version=self.version, multipath=self.multipath)
@property
def user_config_dir(self):
return user_config_dir(self.appname, self.appauthor,
version=self.version, roaming=self.roaming)
@property
def site_config_dir(self):
return site_config_dir(self.appname, self.appauthor,
version=self.version, multipath=self.multipath)
@property
def user_cache_dir(self):
return user_cache_dir(self.appname, self.appauthor,
version=self.version)
@property
def user_state_dir(self):
return user_state_dir(self.appname, self.appauthor,
version=self.version)
@property
def user_log_dir(self):
return user_log_dir(self.appname, self.appauthor,
version=self.version)
#---- internal support stuff
def _get_win_folder_from_registry(csidl_name):
"""This is a fallback technique at best. I'm not sure if using the
registry for this guarantees us the correct answer for all CSIDL_*
names.
"""
if PY3:
import winreg as _winreg
else:
import _winreg
shell_folder_name = {
"CSIDL_APPDATA": "AppData",
"CSIDL_COMMON_APPDATA": "Common AppData",
"CSIDL_LOCAL_APPDATA": "Local AppData",
}[csidl_name]
key = _winreg.OpenKey(
_winreg.HKEY_CURRENT_USER,
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
)
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
return dir
def _get_win_folder_with_pywin32(csidl_name):
from win32com.shell import shellcon, shell
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
# Try to make this a unicode path because SHGetFolderPath does
# not return unicode strings when there is unicode data in the
# path.
try:
dir = unicode(dir)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
try:
import win32api
dir = win32api.GetShortPathName(dir)
except ImportError:
pass
except UnicodeError:
pass
return dir
def _get_win_folder_with_ctypes(csidl_name):
import ctypes
csidl_const = {
"CSIDL_APPDATA": 26,
"CSIDL_COMMON_APPDATA": 35,
"CSIDL_LOCAL_APPDATA": 28,
}[csidl_name]
buf = ctypes.create_unicode_buffer(1024)
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in buf:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf2 = ctypes.create_unicode_buffer(1024)
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
buf = buf2
return buf.value
def _get_win_folder_with_jna(csidl_name):
import array
from com.sun import jna
from com.sun.jna.platform import win32
buf_size = win32.WinDef.MAX_PATH * 2
buf = array.zeros('c', buf_size)
shell = win32.Shell32.INSTANCE
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf = array.zeros('c', buf_size)
kernel = win32.Kernel32.INSTANCE
if kernel.GetShortPathName(dir, buf, buf_size):
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
return dir
if system == "win32":
try:
import win32com.shell
_get_win_folder = _get_win_folder_with_pywin32
except ImportError:
try:
from ctypes import windll
_get_win_folder = _get_win_folder_with_ctypes
except ImportError:
try:
import com.sun.jna
_get_win_folder = _get_win_folder_with_jna
except ImportError:
_get_win_folder = _get_win_folder_from_registry
#---- self test code
if __name__ == "__main__":
appname = "MyApp"
appauthor = "MyCompany"
props = ("user_data_dir",
"user_config_dir",
"user_cache_dir",
"user_state_dir",
"user_log_dir",
"site_data_dir",
"site_config_dir")
print("-- app dirs %s --" % __version__)
print("-- app dirs (with optional 'version')")
dirs = AppDirs(appname, appauthor, version="1.0")
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (without optional 'version')")
dirs = AppDirs(appname, appauthor)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (without optional 'appauthor')")
dirs = AppDirs(appname)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (with disabled 'appauthor')")
dirs = AppDirs(appname, appauthor=False)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))

View file

@ -0,0 +1,36 @@
"""Read resources contained within a package."""
from ._common import (
as_file,
files,
Package,
)
from ._legacy import (
contents,
open_binary,
read_binary,
open_text,
read_text,
is_resource,
path,
Resource,
)
from .abc import ResourceReader
__all__ = [
'Package',
'Resource',
'ResourceReader',
'as_file',
'contents',
'files',
'is_resource',
'open_binary',
'open_text',
'path',
'read_binary',
'read_text',
]

View file

@ -0,0 +1,170 @@
from contextlib import suppress
from io import TextIOWrapper
from . import abc
class SpecLoaderAdapter:
"""
Adapt a package spec to adapt the underlying loader.
"""
def __init__(self, spec, adapter=lambda spec: spec.loader):
self.spec = spec
self.loader = adapter(spec)
def __getattr__(self, name):
return getattr(self.spec, name)
class TraversableResourcesLoader:
"""
Adapt a loader to provide TraversableResources.
"""
def __init__(self, spec):
self.spec = spec
def get_resource_reader(self, name):
return CompatibilityFiles(self.spec)._native()
def _io_wrapper(file, mode='r', *args, **kwargs):
if mode == 'r':
return TextIOWrapper(file, *args, **kwargs)
elif mode == 'rb':
return file
raise ValueError(
"Invalid mode value '{}', only 'r' and 'rb' are supported".format(mode)
)
class CompatibilityFiles:
"""
Adapter for an existing or non-existent resource reader
to provide a compatibility .files().
"""
class SpecPath(abc.Traversable):
"""
Path tied to a module spec.
Can be read and exposes the resource reader children.
"""
def __init__(self, spec, reader):
self._spec = spec
self._reader = reader
def iterdir(self):
if not self._reader:
return iter(())
return iter(
CompatibilityFiles.ChildPath(self._reader, path)
for path in self._reader.contents()
)
def is_file(self):
return False
is_dir = is_file
def joinpath(self, other):
if not self._reader:
return CompatibilityFiles.OrphanPath(other)
return CompatibilityFiles.ChildPath(self._reader, other)
@property
def name(self):
return self._spec.name
def open(self, mode='r', *args, **kwargs):
return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs)
class ChildPath(abc.Traversable):
"""
Path tied to a resource reader child.
Can be read but doesn't expose any meaningful children.
"""
def __init__(self, reader, name):
self._reader = reader
self._name = name
def iterdir(self):
return iter(())
def is_file(self):
return self._reader.is_resource(self.name)
def is_dir(self):
return not self.is_file()
def joinpath(self, other):
return CompatibilityFiles.OrphanPath(self.name, other)
@property
def name(self):
return self._name
def open(self, mode='r', *args, **kwargs):
return _io_wrapper(
self._reader.open_resource(self.name), mode, *args, **kwargs
)
class OrphanPath(abc.Traversable):
"""
Orphan path, not tied to a module spec or resource reader.
Can't be read and doesn't expose any meaningful children.
"""
def __init__(self, *path_parts):
if len(path_parts) < 1:
raise ValueError('Need at least one path part to construct a path')
self._path = path_parts
def iterdir(self):
return iter(())
def is_file(self):
return False
is_dir = is_file
def joinpath(self, other):
return CompatibilityFiles.OrphanPath(*self._path, other)
@property
def name(self):
return self._path[-1]
def open(self, mode='r', *args, **kwargs):
raise FileNotFoundError("Can't open orphan path")
def __init__(self, spec):
self.spec = spec
@property
def _reader(self):
with suppress(AttributeError):
return self.spec.loader.get_resource_reader(self.spec.name)
def _native(self):
"""
Return the native reader if it supports files().
"""
reader = self._reader
return reader if hasattr(reader, 'files') else self
def __getattr__(self, attr):
return getattr(self._reader, attr)
def files(self):
return CompatibilityFiles.SpecPath(self.spec, self._reader)
def wrap_spec(package):
"""
Construct a package spec with traversable compatibility
on the spec/loader/reader.
"""
return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader)

View file

@ -0,0 +1,104 @@
import os
import pathlib
import tempfile
import functools
import contextlib
import types
import importlib
from typing import Union, Optional
from .abc import ResourceReader, Traversable
from ._compat import wrap_spec
Package = Union[types.ModuleType, str]
def files(package):
# type: (Package) -> Traversable
"""
Get a Traversable resource from a package
"""
return from_package(get_package(package))
def get_resource_reader(package):
# type: (types.ModuleType) -> Optional[ResourceReader]
"""
Return the package's loader if it's a ResourceReader.
"""
# We can't use
# a issubclass() check here because apparently abc.'s __subclasscheck__()
# hook wants to create a weak reference to the object, but
# zipimport.zipimporter does not support weak references, resulting in a
# TypeError. That seems terrible.
spec = package.__spec__
reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore
if reader is None:
return None
return reader(spec.name) # type: ignore
def resolve(cand):
# type: (Package) -> types.ModuleType
return cand if isinstance(cand, types.ModuleType) else importlib.import_module(cand)
def get_package(package):
# type: (Package) -> types.ModuleType
"""Take a package name or module object and return the module.
Raise an exception if the resolved module is not a package.
"""
resolved = resolve(package)
if wrap_spec(resolved).submodule_search_locations is None:
raise TypeError(f'{package!r} is not a package')
return resolved
def from_package(package):
"""
Return a Traversable object for the given package.
"""
spec = wrap_spec(package)
reader = spec.loader.get_resource_reader(spec.name)
return reader.files()
@contextlib.contextmanager
def _tempfile(reader, suffix=''):
# Not using tempfile.NamedTemporaryFile as it leads to deeper 'try'
# blocks due to the need to close the temporary file to work on Windows
# properly.
fd, raw_path = tempfile.mkstemp(suffix=suffix)
try:
try:
os.write(fd, reader())
finally:
os.close(fd)
del reader
yield pathlib.Path(raw_path)
finally:
try:
os.remove(raw_path)
except FileNotFoundError:
pass
@functools.singledispatch
def as_file(path):
"""
Given a Traversable object, return that object as a
path on the local file system in a context manager.
"""
return _tempfile(path.read_bytes, suffix=path.name)
@as_file.register(pathlib.Path)
@contextlib.contextmanager
def _(path):
"""
Degenerate behavior for pathlib.Path objects.
"""
yield path

View file

@ -0,0 +1,98 @@
# flake8: noqa
import abc
import sys
import pathlib
from contextlib import suppress
if sys.version_info >= (3, 10):
from zipfile import Path as ZipPath # type: ignore
else:
from ..zipp import Path as ZipPath # type: ignore
try:
from typing import runtime_checkable # type: ignore
except ImportError:
def runtime_checkable(cls): # type: ignore
return cls
try:
from typing import Protocol # type: ignore
except ImportError:
Protocol = abc.ABC # type: ignore
class TraversableResourcesLoader:
"""
Adapt loaders to provide TraversableResources and other
compatibility.
Used primarily for Python 3.9 and earlier where the native
loaders do not yet implement TraversableResources.
"""
def __init__(self, spec):
self.spec = spec
@property
def path(self):
return self.spec.origin
def get_resource_reader(self, name):
from . import readers, _adapters
def _zip_reader(spec):
with suppress(AttributeError):
return readers.ZipReader(spec.loader, spec.name)
def _namespace_reader(spec):
with suppress(AttributeError, ValueError):
return readers.NamespaceReader(spec.submodule_search_locations)
def _available_reader(spec):
with suppress(AttributeError):
return spec.loader.get_resource_reader(spec.name)
def _native_reader(spec):
reader = _available_reader(spec)
return reader if hasattr(reader, 'files') else None
def _file_reader(spec):
try:
path = pathlib.Path(self.path)
except TypeError:
return None
if path.exists():
return readers.FileReader(self)
return (
# native reader if it supplies 'files'
_native_reader(self.spec)
or
# local ZipReader if a zip module
_zip_reader(self.spec)
or
# local NamespaceReader if a namespace module
_namespace_reader(self.spec)
or
# local FileReader
_file_reader(self.spec)
# fallback - adapt the spec ResourceReader to TraversableReader
or _adapters.CompatibilityFiles(self.spec)
)
def wrap_spec(package):
"""
Construct a package spec with traversable compatibility
on the spec/loader/reader.
Supersedes _adapters.wrap_spec to use TraversableResourcesLoader
from above for older Python compatibility (<3.10).
"""
from . import _adapters
return _adapters.SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader)

View file

@ -0,0 +1,35 @@
from itertools import filterfalse
from typing import (
Callable,
Iterable,
Iterator,
Optional,
Set,
TypeVar,
Union,
)
# Type and type variable definitions
_T = TypeVar('_T')
_U = TypeVar('_U')
def unique_everseen(
iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = None
) -> Iterator[_T]:
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen: Set[Union[_T, _U]] = set()
seen_add = seen.add
if key is None:
for element in filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element

View file

@ -0,0 +1,121 @@
import functools
import os
import pathlib
import types
import warnings
from typing import Union, Iterable, ContextManager, BinaryIO, TextIO, Any
from . import _common
Package = Union[types.ModuleType, str]
Resource = str
def deprecated(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
warnings.warn(
f"{func.__name__} is deprecated. Use files() instead. "
"Refer to https://importlib-resources.readthedocs.io"
"/en/latest/using.html#migrating-from-legacy for migration advice.",
DeprecationWarning,
stacklevel=2,
)
return func(*args, **kwargs)
return wrapper
def normalize_path(path):
# type: (Any) -> str
"""Normalize a path by ensuring it is a string.
If the resulting string contains path separators, an exception is raised.
"""
str_path = str(path)
parent, file_name = os.path.split(str_path)
if parent:
raise ValueError(f'{path!r} must be only a file name')
return file_name
@deprecated
def open_binary(package: Package, resource: Resource) -> BinaryIO:
"""Return a file-like object opened for binary reading of the resource."""
return (_common.files(package) / normalize_path(resource)).open('rb')
@deprecated
def read_binary(package: Package, resource: Resource) -> bytes:
"""Return the binary contents of the resource."""
return (_common.files(package) / normalize_path(resource)).read_bytes()
@deprecated
def open_text(
package: Package,
resource: Resource,
encoding: str = 'utf-8',
errors: str = 'strict',
) -> TextIO:
"""Return a file-like object opened for text reading of the resource."""
return (_common.files(package) / normalize_path(resource)).open(
'r', encoding=encoding, errors=errors
)
@deprecated
def read_text(
package: Package,
resource: Resource,
encoding: str = 'utf-8',
errors: str = 'strict',
) -> str:
"""Return the decoded string of the resource.
The decoding-related arguments have the same semantics as those of
bytes.decode().
"""
with open_text(package, resource, encoding, errors) as fp:
return fp.read()
@deprecated
def contents(package: Package) -> Iterable[str]:
"""Return an iterable of entries in `package`.
Note that not all entries are resources. Specifically, directories are
not considered resources. Use `is_resource()` on each entry returned here
to check if it is a resource or not.
"""
return [path.name for path in _common.files(package).iterdir()]
@deprecated
def is_resource(package: Package, name: str) -> bool:
"""True if `name` is a resource inside `package`.
Directories are *not* resources.
"""
resource = normalize_path(name)
return any(
traversable.name == resource and traversable.is_file()
for traversable in _common.files(package).iterdir()
)
@deprecated
def path(
package: Package,
resource: Resource,
) -> ContextManager[pathlib.Path]:
"""A context manager providing a file path object to the resource.
If the resource does not already exist on its own on the file system,
a temporary file will be created. If the file was created, the file
will be deleted upon exiting the context manager (no exception is
raised if the file was deleted prior to the context manager
exiting).
"""
return _common.as_file(_common.files(package) / normalize_path(resource))

View file

@ -0,0 +1,137 @@
import abc
from typing import BinaryIO, Iterable, Text
from ._compat import runtime_checkable, Protocol
class ResourceReader(metaclass=abc.ABCMeta):
"""Abstract base class for loaders to provide resource reading support."""
@abc.abstractmethod
def open_resource(self, resource: Text) -> BinaryIO:
"""Return an opened, file-like object for binary reading.
The 'resource' argument is expected to represent only a file name.
If the resource cannot be found, FileNotFoundError is raised.
"""
# This deliberately raises FileNotFoundError instead of
# NotImplementedError so that if this method is accidentally called,
# it'll still do the right thing.
raise FileNotFoundError
@abc.abstractmethod
def resource_path(self, resource: Text) -> Text:
"""Return the file system path to the specified resource.
The 'resource' argument is expected to represent only a file name.
If the resource does not exist on the file system, raise
FileNotFoundError.
"""
# This deliberately raises FileNotFoundError instead of
# NotImplementedError so that if this method is accidentally called,
# it'll still do the right thing.
raise FileNotFoundError
@abc.abstractmethod
def is_resource(self, path: Text) -> bool:
"""Return True if the named 'path' is a resource.
Files are resources, directories are not.
"""
raise FileNotFoundError
@abc.abstractmethod
def contents(self) -> Iterable[str]:
"""Return an iterable of entries in `package`."""
raise FileNotFoundError
@runtime_checkable
class Traversable(Protocol):
"""
An object with a subset of pathlib.Path methods suitable for
traversing directories and opening files.
"""
@abc.abstractmethod
def iterdir(self):
"""
Yield Traversable objects in self
"""
def read_bytes(self):
"""
Read contents of self as bytes
"""
with self.open('rb') as strm:
return strm.read()
def read_text(self, encoding=None):
"""
Read contents of self as text
"""
with self.open(encoding=encoding) as strm:
return strm.read()
@abc.abstractmethod
def is_dir(self) -> bool:
"""
Return True if self is a directory
"""
@abc.abstractmethod
def is_file(self) -> bool:
"""
Return True if self is a file
"""
@abc.abstractmethod
def joinpath(self, child):
"""
Return Traversable child in self
"""
def __truediv__(self, child):
"""
Return Traversable child in self
"""
return self.joinpath(child)
@abc.abstractmethod
def open(self, mode='r', *args, **kwargs):
"""
mode may be 'r' or 'rb' to open as text or binary. Return a handle
suitable for reading (same as pathlib.Path.open).
When opening as text, accepts encoding parameters such as those
accepted by io.TextIOWrapper.
"""
@abc.abstractproperty
def name(self) -> str:
"""
The base name of this object without any parent references.
"""
class TraversableResources(ResourceReader):
"""
The required interface for providing traversable
resources.
"""
@abc.abstractmethod
def files(self):
"""Return a Traversable object for the loaded package."""
def open_resource(self, resource):
return self.files().joinpath(resource).open('rb')
def resource_path(self, resource):
raise FileNotFoundError(resource)
def is_resource(self, path):
return self.files().joinpath(path).is_file()
def contents(self):
return (item.name for item in self.files().iterdir())

View file

@ -0,0 +1,122 @@
import collections
import pathlib
import operator
from . import abc
from ._itertools import unique_everseen
from ._compat import ZipPath
def remove_duplicates(items):
return iter(collections.OrderedDict.fromkeys(items))
class FileReader(abc.TraversableResources):
def __init__(self, loader):
self.path = pathlib.Path(loader.path).parent
def resource_path(self, resource):
"""
Return the file system path to prevent
`resources.path()` from creating a temporary
copy.
"""
return str(self.path.joinpath(resource))
def files(self):
return self.path
class ZipReader(abc.TraversableResources):
def __init__(self, loader, module):
_, _, name = module.rpartition('.')
self.prefix = loader.prefix.replace('\\', '/') + name + '/'
self.archive = loader.archive
def open_resource(self, resource):
try:
return super().open_resource(resource)
except KeyError as exc:
raise FileNotFoundError(exc.args[0])
def is_resource(self, path):
# workaround for `zipfile.Path.is_file` returning true
# for non-existent paths.
target = self.files().joinpath(path)
return target.is_file() and target.exists()
def files(self):
return ZipPath(self.archive, self.prefix)
class MultiplexedPath(abc.Traversable):
"""
Given a series of Traversable objects, implement a merged
version of the interface across all objects. Useful for
namespace packages which may be multihomed at a single
name.
"""
def __init__(self, *paths):
self._paths = list(map(pathlib.Path, remove_duplicates(paths)))
if not self._paths:
message = 'MultiplexedPath must contain at least one path'
raise FileNotFoundError(message)
if not all(path.is_dir() for path in self._paths):
raise NotADirectoryError('MultiplexedPath only supports directories')
def iterdir(self):
files = (file for path in self._paths for file in path.iterdir())
return unique_everseen(files, key=operator.attrgetter('name'))
def read_bytes(self):
raise FileNotFoundError(f'{self} is not a file')
def read_text(self, *args, **kwargs):
raise FileNotFoundError(f'{self} is not a file')
def is_dir(self):
return True
def is_file(self):
return False
def joinpath(self, child):
# first try to find child in current paths
for file in self.iterdir():
if file.name == child:
return file
# if it does not exist, construct it with the first path
return self._paths[0] / child
__truediv__ = joinpath
def open(self, *args, **kwargs):
raise FileNotFoundError(f'{self} is not a file')
@property
def name(self):
return self._paths[0].name
def __repr__(self):
paths = ', '.join(f"'{path}'" for path in self._paths)
return f'MultiplexedPath({paths})'
class NamespaceReader(abc.TraversableResources):
def __init__(self, namespace_path):
if 'NamespacePath' not in str(namespace_path):
raise ValueError('Invalid path')
self.path = MultiplexedPath(*list(namespace_path))
def resource_path(self, resource):
"""
Return the file system path to prevent
`resources.path()` from creating a temporary
copy.
"""
return str(self.path.joinpath(resource))
def files(self):
return self.path

View file

@ -0,0 +1,116 @@
"""
Interface adapters for low-level readers.
"""
import abc
import io
import itertools
from typing import BinaryIO, List
from .abc import Traversable, TraversableResources
class SimpleReader(abc.ABC):
"""
The minimum, low-level interface required from a resource
provider.
"""
@abc.abstractproperty
def package(self):
# type: () -> str
"""
The name of the package for which this reader loads resources.
"""
@abc.abstractmethod
def children(self):
# type: () -> List['SimpleReader']
"""
Obtain an iterable of SimpleReader for available
child containers (e.g. directories).
"""
@abc.abstractmethod
def resources(self):
# type: () -> List[str]
"""
Obtain available named resources for this virtual package.
"""
@abc.abstractmethod
def open_binary(self, resource):
# type: (str) -> BinaryIO
"""
Obtain a File-like for a named resource.
"""
@property
def name(self):
return self.package.split('.')[-1]
class ResourceHandle(Traversable):
"""
Handle to a named resource in a ResourceReader.
"""
def __init__(self, parent, name):
# type: (ResourceContainer, str) -> None
self.parent = parent
self.name = name # type: ignore
def is_file(self):
return True
def is_dir(self):
return False
def open(self, mode='r', *args, **kwargs):
stream = self.parent.reader.open_binary(self.name)
if 'b' not in mode:
stream = io.TextIOWrapper(*args, **kwargs)
return stream
def joinpath(self, name):
raise RuntimeError("Cannot traverse into a resource")
class ResourceContainer(Traversable):
"""
Traversable container for a package's resources via its reader.
"""
def __init__(self, reader):
# type: (SimpleReader) -> None
self.reader = reader
def is_dir(self):
return True
def is_file(self):
return False
def iterdir(self):
files = (ResourceHandle(self, name) for name in self.reader.resources)
dirs = map(ResourceContainer, self.reader.children())
return itertools.chain(files, dirs)
def open(self, *args, **kwargs):
raise IsADirectoryError()
def joinpath(self, name):
return next(
traversable for traversable in self.iterdir() if traversable.name == name
)
class TraversableReader(TraversableResources, SimpleReader):
"""
A TraversableResources based on SimpleReader. Resource providers
may derive from this class to provide the TraversableResources
interface by supplying the SimpleReader interface.
"""
def files(self):
return ResourceContainer(self)

View file

@ -0,0 +1,213 @@
import os
import subprocess
import contextlib
import functools
import tempfile
import shutil
import operator
@contextlib.contextmanager
def pushd(dir):
orig = os.getcwd()
os.chdir(dir)
try:
yield dir
finally:
os.chdir(orig)
@contextlib.contextmanager
def tarball_context(url, target_dir=None, runner=None, pushd=pushd):
"""
Get a tarball, extract it, change to that directory, yield, then
clean up.
`runner` is the function to invoke commands.
`pushd` is a context manager for changing the directory.
"""
if target_dir is None:
target_dir = os.path.basename(url).replace('.tar.gz', '').replace('.tgz', '')
if runner is None:
runner = functools.partial(subprocess.check_call, shell=True)
# In the tar command, use --strip-components=1 to strip the first path and
# then
# use -C to cause the files to be extracted to {target_dir}. This ensures
# that we always know where the files were extracted.
runner('mkdir {target_dir}'.format(**vars()))
try:
getter = 'wget {url} -O -'
extract = 'tar x{compression} --strip-components=1 -C {target_dir}'
cmd = ' | '.join((getter, extract))
runner(cmd.format(compression=infer_compression(url), **vars()))
with pushd(target_dir):
yield target_dir
finally:
runner('rm -Rf {target_dir}'.format(**vars()))
def infer_compression(url):
"""
Given a URL or filename, infer the compression code for tar.
"""
# cheat and just assume it's the last two characters
compression_indicator = url[-2:]
mapping = dict(gz='z', bz='j', xz='J')
# Assume 'z' (gzip) if no match
return mapping.get(compression_indicator, 'z')
@contextlib.contextmanager
def temp_dir(remover=shutil.rmtree):
"""
Create a temporary directory context. Pass a custom remover
to override the removal behavior.
"""
temp_dir = tempfile.mkdtemp()
try:
yield temp_dir
finally:
remover(temp_dir)
@contextlib.contextmanager
def repo_context(url, branch=None, quiet=True, dest_ctx=temp_dir):
"""
Check out the repo indicated by url.
If dest_ctx is supplied, it should be a context manager
to yield the target directory for the check out.
"""
exe = 'git' if 'git' in url else 'hg'
with dest_ctx() as repo_dir:
cmd = [exe, 'clone', url, repo_dir]
if branch:
cmd.extend(['--branch', branch])
devnull = open(os.path.devnull, 'w')
stdout = devnull if quiet else None
subprocess.check_call(cmd, stdout=stdout)
yield repo_dir
@contextlib.contextmanager
def null():
yield
class ExceptionTrap:
"""
A context manager that will catch certain exceptions and provide an
indication they occurred.
>>> with ExceptionTrap() as trap:
... raise Exception()
>>> bool(trap)
True
>>> with ExceptionTrap() as trap:
... pass
>>> bool(trap)
False
>>> with ExceptionTrap(ValueError) as trap:
... raise ValueError("1 + 1 is not 3")
>>> bool(trap)
True
>>> with ExceptionTrap(ValueError) as trap:
... raise Exception()
Traceback (most recent call last):
...
Exception
>>> bool(trap)
False
"""
exc_info = None, None, None
def __init__(self, exceptions=(Exception,)):
self.exceptions = exceptions
def __enter__(self):
return self
@property
def type(self):
return self.exc_info[0]
@property
def value(self):
return self.exc_info[1]
@property
def tb(self):
return self.exc_info[2]
def __exit__(self, *exc_info):
type = exc_info[0]
matches = type and issubclass(type, self.exceptions)
if matches:
self.exc_info = exc_info
return matches
def __bool__(self):
return bool(self.type)
def raises(self, func, *, _test=bool):
"""
Wrap func and replace the result with the truth
value of the trap (True if an exception occurred).
First, give the decorator an alias to support Python 3.8
Syntax.
>>> raises = ExceptionTrap(ValueError).raises
Now decorate a function that always fails.
>>> @raises
... def fail():
... raise ValueError('failed')
>>> fail()
True
"""
@functools.wraps(func)
def wrapper(*args, **kwargs):
with ExceptionTrap(self.exceptions) as trap:
func(*args, **kwargs)
return _test(trap)
return wrapper
def passes(self, func):
"""
Wrap func and replace the result with the truth
value of the trap (True if no exception).
First, give the decorator an alias to support Python 3.8
Syntax.
>>> passes = ExceptionTrap(ValueError).passes
Now decorate a function that always fails.
>>> @passes
... def fail():
... raise ValueError('failed')
>>> fail()
False
"""
return self.raises(func, _test=operator.not_)
class suppress(contextlib.suppress, contextlib.ContextDecorator):
"""
A version of contextlib.suppress with decorator support.
>>> @suppress(KeyError)
... def key_error():
... {}['']
>>> key_error()
"""

View file

@ -0,0 +1,525 @@
import functools
import time
import inspect
import collections
import types
import itertools
import pkg_resources.extern.more_itertools
from typing import Callable, TypeVar
CallableT = TypeVar("CallableT", bound=Callable[..., object])
def compose(*funcs):
"""
Compose any number of unary functions into a single unary function.
>>> import textwrap
>>> expected = str.strip(textwrap.dedent(compose.__doc__))
>>> strip_and_dedent = compose(str.strip, textwrap.dedent)
>>> strip_and_dedent(compose.__doc__) == expected
True
Compose also allows the innermost function to take arbitrary arguments.
>>> round_three = lambda x: round(x, ndigits=3)
>>> f = compose(round_three, int.__truediv__)
>>> [f(3*x, x+1) for x in range(1,10)]
[1.5, 2.0, 2.25, 2.4, 2.5, 2.571, 2.625, 2.667, 2.7]
"""
def compose_two(f1, f2):
return lambda *args, **kwargs: f1(f2(*args, **kwargs))
return functools.reduce(compose_two, funcs)
def method_caller(method_name, *args, **kwargs):
"""
Return a function that will call a named method on the
target object with optional positional and keyword
arguments.
>>> lower = method_caller('lower')
>>> lower('MyString')
'mystring'
"""
def call_method(target):
func = getattr(target, method_name)
return func(*args, **kwargs)
return call_method
def once(func):
"""
Decorate func so it's only ever called the first time.
This decorator can ensure that an expensive or non-idempotent function
will not be expensive on subsequent calls and is idempotent.
>>> add_three = once(lambda a: a+3)
>>> add_three(3)
6
>>> add_three(9)
6
>>> add_three('12')
6
To reset the stored value, simply clear the property ``saved_result``.
>>> del add_three.saved_result
>>> add_three(9)
12
>>> add_three(8)
12
Or invoke 'reset()' on it.
>>> add_three.reset()
>>> add_three(-3)
0
>>> add_three(0)
0
"""
@functools.wraps(func)
def wrapper(*args, **kwargs):
if not hasattr(wrapper, 'saved_result'):
wrapper.saved_result = func(*args, **kwargs)
return wrapper.saved_result
wrapper.reset = lambda: vars(wrapper).__delitem__('saved_result')
return wrapper
def method_cache(
method: CallableT,
cache_wrapper: Callable[
[CallableT], CallableT
] = functools.lru_cache(), # type: ignore[assignment]
) -> CallableT:
"""
Wrap lru_cache to support storing the cache data in the object instances.
Abstracts the common paradigm where the method explicitly saves an
underscore-prefixed protected property on first call and returns that
subsequently.
>>> class MyClass:
... calls = 0
...
... @method_cache
... def method(self, value):
... self.calls += 1
... return value
>>> a = MyClass()
>>> a.method(3)
3
>>> for x in range(75):
... res = a.method(x)
>>> a.calls
75
Note that the apparent behavior will be exactly like that of lru_cache
except that the cache is stored on each instance, so values in one
instance will not flush values from another, and when an instance is
deleted, so are the cached values for that instance.
>>> b = MyClass()
>>> for x in range(35):
... res = b.method(x)
>>> b.calls
35
>>> a.method(0)
0
>>> a.calls
75
Note that if method had been decorated with ``functools.lru_cache()``,
a.calls would have been 76 (due to the cached value of 0 having been
flushed by the 'b' instance).
Clear the cache with ``.cache_clear()``
>>> a.method.cache_clear()
Same for a method that hasn't yet been called.
>>> c = MyClass()
>>> c.method.cache_clear()
Another cache wrapper may be supplied:
>>> cache = functools.lru_cache(maxsize=2)
>>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache)
>>> a = MyClass()
>>> a.method2()
3
Caution - do not subsequently wrap the method with another decorator, such
as ``@property``, which changes the semantics of the function.
See also
http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
for another implementation and additional justification.
"""
def wrapper(self: object, *args: object, **kwargs: object) -> object:
# it's the first call, replace the method with a cached, bound method
bound_method: CallableT = types.MethodType( # type: ignore[assignment]
method, self
)
cached_method = cache_wrapper(bound_method)
setattr(self, method.__name__, cached_method)
return cached_method(*args, **kwargs)
# Support cache clear even before cache has been created.
wrapper.cache_clear = lambda: None # type: ignore[attr-defined]
return ( # type: ignore[return-value]
_special_method_cache(method, cache_wrapper) or wrapper
)
def _special_method_cache(method, cache_wrapper):
"""
Because Python treats special methods differently, it's not
possible to use instance attributes to implement the cached
methods.
Instead, install the wrapper method under a different name
and return a simple proxy to that wrapper.
https://github.com/jaraco/jaraco.functools/issues/5
"""
name = method.__name__
special_names = '__getattr__', '__getitem__'
if name not in special_names:
return
wrapper_name = '__cached' + name
def proxy(self, *args, **kwargs):
if wrapper_name not in vars(self):
bound = types.MethodType(method, self)
cache = cache_wrapper(bound)
setattr(self, wrapper_name, cache)
else:
cache = getattr(self, wrapper_name)
return cache(*args, **kwargs)
return proxy
def apply(transform):
"""
Decorate a function with a transform function that is
invoked on results returned from the decorated function.
>>> @apply(reversed)
... def get_numbers(start):
... "doc for get_numbers"
... return range(start, start+3)
>>> list(get_numbers(4))
[6, 5, 4]
>>> get_numbers.__doc__
'doc for get_numbers'
"""
def wrap(func):
return functools.wraps(func)(compose(transform, func))
return wrap
def result_invoke(action):
r"""
Decorate a function with an action function that is
invoked on the results returned from the decorated
function (for its side-effect), then return the original
result.
>>> @result_invoke(print)
... def add_two(a, b):
... return a + b
>>> x = add_two(2, 3)
5
>>> x
5
"""
def wrap(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
action(result)
return result
return wrapper
return wrap
def call_aside(f, *args, **kwargs):
"""
Call a function for its side effect after initialization.
>>> @call_aside
... def func(): print("called")
called
>>> func()
called
Use functools.partial to pass parameters to the initial call
>>> @functools.partial(call_aside, name='bingo')
... def func(name): print("called with", name)
called with bingo
"""
f(*args, **kwargs)
return f
class Throttler:
"""
Rate-limit a function (or other callable)
"""
def __init__(self, func, max_rate=float('Inf')):
if isinstance(func, Throttler):
func = func.func
self.func = func
self.max_rate = max_rate
self.reset()
def reset(self):
self.last_called = 0
def __call__(self, *args, **kwargs):
self._wait()
return self.func(*args, **kwargs)
def _wait(self):
"ensure at least 1/max_rate seconds from last call"
elapsed = time.time() - self.last_called
must_wait = 1 / self.max_rate - elapsed
time.sleep(max(0, must_wait))
self.last_called = time.time()
def __get__(self, obj, type=None):
return first_invoke(self._wait, functools.partial(self.func, obj))
def first_invoke(func1, func2):
"""
Return a function that when invoked will invoke func1 without
any parameters (for its side-effect) and then invoke func2
with whatever parameters were passed, returning its result.
"""
def wrapper(*args, **kwargs):
func1()
return func2(*args, **kwargs)
return wrapper
def retry_call(func, cleanup=lambda: None, retries=0, trap=()):
"""
Given a callable func, trap the indicated exceptions
for up to 'retries' times, invoking cleanup on the
exception. On the final attempt, allow any exceptions
to propagate.
"""
attempts = itertools.count() if retries == float('inf') else range(retries)
for attempt in attempts:
try:
return func()
except trap:
cleanup()
return func()
def retry(*r_args, **r_kwargs):
"""
Decorator wrapper for retry_call. Accepts arguments to retry_call
except func and then returns a decorator for the decorated function.
Ex:
>>> @retry(retries=3)
... def my_func(a, b):
... "this is my funk"
... print(a, b)
>>> my_func.__doc__
'this is my funk'
"""
def decorate(func):
@functools.wraps(func)
def wrapper(*f_args, **f_kwargs):
bound = functools.partial(func, *f_args, **f_kwargs)
return retry_call(bound, *r_args, **r_kwargs)
return wrapper
return decorate
def print_yielded(func):
"""
Convert a generator into a function that prints all yielded elements
>>> @print_yielded
... def x():
... yield 3; yield None
>>> x()
3
None
"""
print_all = functools.partial(map, print)
print_results = compose(more_itertools.consume, print_all, func)
return functools.wraps(func)(print_results)
def pass_none(func):
"""
Wrap func so it's not called if its first param is None
>>> print_text = pass_none(print)
>>> print_text('text')
text
>>> print_text(None)
"""
@functools.wraps(func)
def wrapper(param, *args, **kwargs):
if param is not None:
return func(param, *args, **kwargs)
return wrapper
def assign_params(func, namespace):
"""
Assign parameters from namespace where func solicits.
>>> def func(x, y=3):
... print(x, y)
>>> assigned = assign_params(func, dict(x=2, z=4))
>>> assigned()
2 3
The usual errors are raised if a function doesn't receive
its required parameters:
>>> assigned = assign_params(func, dict(y=3, z=4))
>>> assigned()
Traceback (most recent call last):
TypeError: func() ...argument...
It even works on methods:
>>> class Handler:
... def meth(self, arg):
... print(arg)
>>> assign_params(Handler().meth, dict(arg='crystal', foo='clear'))()
crystal
"""
sig = inspect.signature(func)
params = sig.parameters.keys()
call_ns = {k: namespace[k] for k in params if k in namespace}
return functools.partial(func, **call_ns)
def save_method_args(method):
"""
Wrap a method such that when it is called, the args and kwargs are
saved on the method.
>>> class MyClass:
... @save_method_args
... def method(self, a, b):
... print(a, b)
>>> my_ob = MyClass()
>>> my_ob.method(1, 2)
1 2
>>> my_ob._saved_method.args
(1, 2)
>>> my_ob._saved_method.kwargs
{}
>>> my_ob.method(a=3, b='foo')
3 foo
>>> my_ob._saved_method.args
()
>>> my_ob._saved_method.kwargs == dict(a=3, b='foo')
True
The arguments are stored on the instance, allowing for
different instance to save different args.
>>> your_ob = MyClass()
>>> your_ob.method({str('x'): 3}, b=[4])
{'x': 3} [4]
>>> your_ob._saved_method.args
({'x': 3},)
>>> my_ob._saved_method.args
()
"""
args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs')
@functools.wraps(method)
def wrapper(self, *args, **kwargs):
attr_name = '_saved_' + method.__name__
attr = args_and_kwargs(args, kwargs)
setattr(self, attr_name, attr)
return method(self, *args, **kwargs)
return wrapper
def except_(*exceptions, replace=None, use=None):
"""
Replace the indicated exceptions, if raised, with the indicated
literal replacement or evaluated expression (if present).
>>> safe_int = except_(ValueError)(int)
>>> safe_int('five')
>>> safe_int('5')
5
Specify a literal replacement with ``replace``.
>>> safe_int_r = except_(ValueError, replace=0)(int)
>>> safe_int_r('five')
0
Provide an expression to ``use`` to pass through particular parameters.
>>> safe_int_pt = except_(ValueError, use='args[0]')(int)
>>> safe_int_pt('five')
'five'
"""
def decorate(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except exceptions:
try:
return eval(use)
except TypeError:
return replace
return wrapper
return decorate

View file

@ -0,0 +1,599 @@
import re
import itertools
import textwrap
import functools
try:
from importlib.resources import files # type: ignore
except ImportError: # pragma: nocover
from pkg_resources.extern.importlib_resources import files # type: ignore
from pkg_resources.extern.jaraco.functools import compose, method_cache
from pkg_resources.extern.jaraco.context import ExceptionTrap
def substitution(old, new):
"""
Return a function that will perform a substitution on a string
"""
return lambda s: s.replace(old, new)
def multi_substitution(*substitutions):
"""
Take a sequence of pairs specifying substitutions, and create
a function that performs those substitutions.
>>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo')
'baz'
"""
substitutions = itertools.starmap(substitution, substitutions)
# compose function applies last function first, so reverse the
# substitutions to get the expected order.
substitutions = reversed(tuple(substitutions))
return compose(*substitutions)
class FoldedCase(str):
"""
A case insensitive string class; behaves just like str
except compares equal when the only variation is case.
>>> s = FoldedCase('hello world')
>>> s == 'Hello World'
True
>>> 'Hello World' == s
True
>>> s != 'Hello World'
False
>>> s.index('O')
4
>>> s.split('O')
['hell', ' w', 'rld']
>>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
['alpha', 'Beta', 'GAMMA']
Sequence membership is straightforward.
>>> "Hello World" in [s]
True
>>> s in ["Hello World"]
True
You may test for set inclusion, but candidate and elements
must both be folded.
>>> FoldedCase("Hello World") in {s}
True
>>> s in {FoldedCase("Hello World")}
True
String inclusion works as long as the FoldedCase object
is on the right.
>>> "hello" in FoldedCase("Hello World")
True
But not if the FoldedCase object is on the left:
>>> FoldedCase('hello') in 'Hello World'
False
In that case, use ``in_``:
>>> FoldedCase('hello').in_('Hello World')
True
>>> FoldedCase('hello') > FoldedCase('Hello')
False
"""
def __lt__(self, other):
return self.lower() < other.lower()
def __gt__(self, other):
return self.lower() > other.lower()
def __eq__(self, other):
return self.lower() == other.lower()
def __ne__(self, other):
return self.lower() != other.lower()
def __hash__(self):
return hash(self.lower())
def __contains__(self, other):
return super().lower().__contains__(other.lower())
def in_(self, other):
"Does self appear in other?"
return self in FoldedCase(other)
# cache lower since it's likely to be called frequently.
@method_cache
def lower(self):
return super().lower()
def index(self, sub):
return self.lower().index(sub.lower())
def split(self, splitter=' ', maxsplit=0):
pattern = re.compile(re.escape(splitter), re.I)
return pattern.split(self, maxsplit)
# Python 3.8 compatibility
_unicode_trap = ExceptionTrap(UnicodeDecodeError)
@_unicode_trap.passes
def is_decodable(value):
r"""
Return True if the supplied value is decodable (using the default
encoding).
>>> is_decodable(b'\xff')
False
>>> is_decodable(b'\x32')
True
"""
value.decode()
def is_binary(value):
r"""
Return True if the value appears to be binary (that is, it's a byte
string and isn't decodable).
>>> is_binary(b'\xff')
True
>>> is_binary('\xff')
False
"""
return isinstance(value, bytes) and not is_decodable(value)
def trim(s):
r"""
Trim something like a docstring to remove the whitespace that
is common due to indentation and formatting.
>>> trim("\n\tfoo = bar\n\t\tbar = baz\n")
'foo = bar\n\tbar = baz'
"""
return textwrap.dedent(s).strip()
def wrap(s):
"""
Wrap lines of text, retaining existing newlines as
paragraph markers.
>>> print(wrap(lorem_ipsum))
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
minim veniam, quis nostrud exercitation ullamco laboris nisi ut
aliquip ex ea commodo consequat. Duis aute irure dolor in
reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
culpa qui officia deserunt mollit anim id est laborum.
<BLANKLINE>
Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam
varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus
magna felis sollicitudin mauris. Integer in mauris eu nibh euismod
gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis
risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue,
eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas
fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla
a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis,
neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing
sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque
nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus
quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis,
molestie eu, feugiat in, orci. In hac habitasse platea dictumst.
"""
paragraphs = s.splitlines()
wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs)
return '\n\n'.join(wrapped)
def unwrap(s):
r"""
Given a multi-line string, return an unwrapped version.
>>> wrapped = wrap(lorem_ipsum)
>>> wrapped.count('\n')
20
>>> unwrapped = unwrap(wrapped)
>>> unwrapped.count('\n')
1
>>> print(unwrapped)
Lorem ipsum dolor sit amet, consectetur adipiscing ...
Curabitur pretium tincidunt lacus. Nulla gravida orci ...
"""
paragraphs = re.split(r'\n\n+', s)
cleaned = (para.replace('\n', ' ') for para in paragraphs)
return '\n'.join(cleaned)
class Splitter(object):
"""object that will split a string with the given arguments for each call
>>> s = Splitter(',')
>>> s('hello, world, this is your, master calling')
['hello', ' world', ' this is your', ' master calling']
"""
def __init__(self, *args):
self.args = args
def __call__(self, s):
return s.split(*self.args)
def indent(string, prefix=' ' * 4):
"""
>>> indent('foo')
' foo'
"""
return prefix + string
class WordSet(tuple):
"""
Given an identifier, return the words that identifier represents,
whether in camel case, underscore-separated, etc.
>>> WordSet.parse("camelCase")
('camel', 'Case')
>>> WordSet.parse("under_sep")
('under', 'sep')
Acronyms should be retained
>>> WordSet.parse("firstSNL")
('first', 'SNL')
>>> WordSet.parse("you_and_I")
('you', 'and', 'I')
>>> WordSet.parse("A simple test")
('A', 'simple', 'test')
Multiple caps should not interfere with the first cap of another word.
>>> WordSet.parse("myABCClass")
('my', 'ABC', 'Class')
The result is a WordSet, so you can get the form you need.
>>> WordSet.parse("myABCClass").underscore_separated()
'my_ABC_Class'
>>> WordSet.parse('a-command').camel_case()
'ACommand'
>>> WordSet.parse('someIdentifier').lowered().space_separated()
'some identifier'
Slices of the result should return another WordSet.
>>> WordSet.parse('taken-out-of-context')[1:].underscore_separated()
'out_of_context'
>>> WordSet.from_class_name(WordSet()).lowered().space_separated()
'word set'
>>> example = WordSet.parse('figured it out')
>>> example.headless_camel_case()
'figuredItOut'
>>> example.dash_separated()
'figured-it-out'
"""
_pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))')
def capitalized(self):
return WordSet(word.capitalize() for word in self)
def lowered(self):
return WordSet(word.lower() for word in self)
def camel_case(self):
return ''.join(self.capitalized())
def headless_camel_case(self):
words = iter(self)
first = next(words).lower()
new_words = itertools.chain((first,), WordSet(words).camel_case())
return ''.join(new_words)
def underscore_separated(self):
return '_'.join(self)
def dash_separated(self):
return '-'.join(self)
def space_separated(self):
return ' '.join(self)
def trim_right(self, item):
"""
Remove the item from the end of the set.
>>> WordSet.parse('foo bar').trim_right('foo')
('foo', 'bar')
>>> WordSet.parse('foo bar').trim_right('bar')
('foo',)
>>> WordSet.parse('').trim_right('bar')
()
"""
return self[:-1] if self and self[-1] == item else self
def trim_left(self, item):
"""
Remove the item from the beginning of the set.
>>> WordSet.parse('foo bar').trim_left('foo')
('bar',)
>>> WordSet.parse('foo bar').trim_left('bar')
('foo', 'bar')
>>> WordSet.parse('').trim_left('bar')
()
"""
return self[1:] if self and self[0] == item else self
def trim(self, item):
"""
>>> WordSet.parse('foo bar').trim('foo')
('bar',)
"""
return self.trim_left(item).trim_right(item)
def __getitem__(self, item):
result = super(WordSet, self).__getitem__(item)
if isinstance(item, slice):
result = WordSet(result)
return result
@classmethod
def parse(cls, identifier):
matches = cls._pattern.finditer(identifier)
return WordSet(match.group(0) for match in matches)
@classmethod
def from_class_name(cls, subject):
return cls.parse(subject.__class__.__name__)
# for backward compatibility
words = WordSet.parse
def simple_html_strip(s):
r"""
Remove HTML from the string `s`.
>>> str(simple_html_strip(''))
''
>>> print(simple_html_strip('A <bold>stormy</bold> day in paradise'))
A stormy day in paradise
>>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.'))
Somebody tell the truth.
>>> print(simple_html_strip('What about<br/>\nmultiple lines?'))
What about
multiple lines?
"""
html_stripper = re.compile('(<!--.*?-->)|(<[^>]*>)|([^<]+)', re.DOTALL)
texts = (match.group(3) or '' for match in html_stripper.finditer(s))
return ''.join(texts)
class SeparatedValues(str):
"""
A string separated by a separator. Overrides __iter__ for getting
the values.
>>> list(SeparatedValues('a,b,c'))
['a', 'b', 'c']
Whitespace is stripped and empty values are discarded.
>>> list(SeparatedValues(' a, b , c, '))
['a', 'b', 'c']
"""
separator = ','
def __iter__(self):
parts = self.split(self.separator)
return filter(None, (part.strip() for part in parts))
class Stripper:
r"""
Given a series of lines, find the common prefix and strip it from them.
>>> lines = [
... 'abcdefg\n',
... 'abc\n',
... 'abcde\n',
... ]
>>> res = Stripper.strip_prefix(lines)
>>> res.prefix
'abc'
>>> list(res.lines)
['defg\n', '\n', 'de\n']
If no prefix is common, nothing should be stripped.
>>> lines = [
... 'abcd\n',
... '1234\n',
... ]
>>> res = Stripper.strip_prefix(lines)
>>> res.prefix = ''
>>> list(res.lines)
['abcd\n', '1234\n']
"""
def __init__(self, prefix, lines):
self.prefix = prefix
self.lines = map(self, lines)
@classmethod
def strip_prefix(cls, lines):
prefix_lines, lines = itertools.tee(lines)
prefix = functools.reduce(cls.common_prefix, prefix_lines)
return cls(prefix, lines)
def __call__(self, line):
if not self.prefix:
return line
null, prefix, rest = line.partition(self.prefix)
return rest
@staticmethod
def common_prefix(s1, s2):
"""
Return the common prefix of two lines.
"""
index = min(len(s1), len(s2))
while s1[:index] != s2[:index]:
index -= 1
return s1[:index]
def remove_prefix(text, prefix):
"""
Remove the prefix from the text if it exists.
>>> remove_prefix('underwhelming performance', 'underwhelming ')
'performance'
>>> remove_prefix('something special', 'sample')
'something special'
"""
null, prefix, rest = text.rpartition(prefix)
return rest
def remove_suffix(text, suffix):
"""
Remove the suffix from the text if it exists.
>>> remove_suffix('name.git', '.git')
'name'
>>> remove_suffix('something special', 'sample')
'something special'
"""
rest, suffix, null = text.partition(suffix)
return rest
def normalize_newlines(text):
r"""
Replace alternate newlines with the canonical newline.
>>> normalize_newlines('Lorem Ipsum\u2029')
'Lorem Ipsum\n'
>>> normalize_newlines('Lorem Ipsum\r\n')
'Lorem Ipsum\n'
>>> normalize_newlines('Lorem Ipsum\x85')
'Lorem Ipsum\n'
"""
newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029']
pattern = '|'.join(newlines)
return re.sub(pattern, '\n', text)
def _nonblank(str):
return str and not str.startswith('#')
@functools.singledispatch
def yield_lines(iterable):
r"""
Yield valid lines of a string or iterable.
>>> list(yield_lines(''))
[]
>>> list(yield_lines(['foo', 'bar']))
['foo', 'bar']
>>> list(yield_lines('foo\nbar'))
['foo', 'bar']
>>> list(yield_lines('\nfoo\n#bar\nbaz #comment'))
['foo', 'baz #comment']
>>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n']))
['foo', 'bar', 'baz', 'bing']
"""
return itertools.chain.from_iterable(map(yield_lines, iterable))
@yield_lines.register(str)
def _(text):
return filter(_nonblank, map(str.strip, text.splitlines()))
def drop_comment(line):
"""
Drop comments.
>>> drop_comment('foo # bar')
'foo'
A hash without a space may be in a URL.
>>> drop_comment('http://example.com/foo#bar')
'http://example.com/foo#bar'
"""
return line.partition(' #')[0]
def join_continuation(lines):
r"""
Join lines continued by a trailing backslash.
>>> list(join_continuation(['foo \\', 'bar', 'baz']))
['foobar', 'baz']
>>> list(join_continuation(['foo \\', 'bar', 'baz']))
['foobar', 'baz']
>>> list(join_continuation(['foo \\', 'bar \\', 'baz']))
['foobarbaz']
Not sure why, but...
The character preceeding the backslash is also elided.
>>> list(join_continuation(['goo\\', 'dly']))
['godly']
A terrible idea, but...
If no line is available to continue, suppress the lines.
>>> list(join_continuation(['foo', 'bar\\', 'baz\\']))
['foo']
"""
lines = iter(lines)
for item in lines:
while item.endswith('\\'):
try:
item = item[:-2].strip() + next(lines)
except StopIteration:
return
yield item

View file

@ -0,0 +1,4 @@
from .more import * # noqa
from .recipes import * # noqa
__version__ = '8.12.0'

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,698 @@
"""Imported from the recipes section of the itertools documentation.
All functions taken from the recipes section of the itertools library docs
[1]_.
Some backward-compatible usability improvements have been made.
.. [1] http://docs.python.org/library/itertools.html#recipes
"""
import warnings
from collections import deque
from itertools import (
chain,
combinations,
count,
cycle,
groupby,
islice,
repeat,
starmap,
tee,
zip_longest,
)
import operator
from random import randrange, sample, choice
__all__ = [
'all_equal',
'before_and_after',
'consume',
'convolve',
'dotproduct',
'first_true',
'flatten',
'grouper',
'iter_except',
'ncycles',
'nth',
'nth_combination',
'padnone',
'pad_none',
'pairwise',
'partition',
'powerset',
'prepend',
'quantify',
'random_combination_with_replacement',
'random_combination',
'random_permutation',
'random_product',
'repeatfunc',
'roundrobin',
'sliding_window',
'tabulate',
'tail',
'take',
'triplewise',
'unique_everseen',
'unique_justseen',
]
def take(n, iterable):
"""Return first *n* items of the iterable as a list.
>>> take(3, range(10))
[0, 1, 2]
If there are fewer than *n* items in the iterable, all of them are
returned.
>>> take(10, range(3))
[0, 1, 2]
"""
return list(islice(iterable, n))
def tabulate(function, start=0):
"""Return an iterator over the results of ``func(start)``,
``func(start + 1)``, ``func(start + 2)``...
*func* should be a function that accepts one integer argument.
If *start* is not specified it defaults to 0. It will be incremented each
time the iterator is advanced.
>>> square = lambda x: x ** 2
>>> iterator = tabulate(square, -3)
>>> take(4, iterator)
[9, 4, 1, 0]
"""
return map(function, count(start))
def tail(n, iterable):
"""Return an iterator over the last *n* items of *iterable*.
>>> t = tail(3, 'ABCDEFG')
>>> list(t)
['E', 'F', 'G']
"""
return iter(deque(iterable, maxlen=n))
def consume(iterator, n=None):
"""Advance *iterable* by *n* steps. If *n* is ``None``, consume it
entirely.
Efficiently exhausts an iterator without returning values. Defaults to
consuming the whole iterator, but an optional second argument may be
provided to limit consumption.
>>> i = (x for x in range(10))
>>> next(i)
0
>>> consume(i, 3)
>>> next(i)
4
>>> consume(i)
>>> next(i)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
StopIteration
If the iterator has fewer items remaining than the provided limit, the
whole iterator will be consumed.
>>> i = (x for x in range(3))
>>> consume(i, 5)
>>> next(i)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
StopIteration
"""
# Use functions that consume iterators at C speed.
if n is None:
# feed the entire iterator into a zero-length deque
deque(iterator, maxlen=0)
else:
# advance to the empty slice starting at position n
next(islice(iterator, n, n), None)
def nth(iterable, n, default=None):
"""Returns the nth item or a default value.
>>> l = range(10)
>>> nth(l, 3)
3
>>> nth(l, 20, "zebra")
'zebra'
"""
return next(islice(iterable, n, None), default)
def all_equal(iterable):
"""
Returns ``True`` if all the elements are equal to each other.
>>> all_equal('aaaa')
True
>>> all_equal('aaab')
False
"""
g = groupby(iterable)
return next(g, True) and not next(g, False)
def quantify(iterable, pred=bool):
"""Return the how many times the predicate is true.
>>> quantify([True, False, True])
2
"""
return sum(map(pred, iterable))
def pad_none(iterable):
"""Returns the sequence of elements and then returns ``None`` indefinitely.
>>> take(5, pad_none(range(3)))
[0, 1, 2, None, None]
Useful for emulating the behavior of the built-in :func:`map` function.
See also :func:`padded`.
"""
return chain(iterable, repeat(None))
padnone = pad_none
def ncycles(iterable, n):
"""Returns the sequence elements *n* times
>>> list(ncycles(["a", "b"], 3))
['a', 'b', 'a', 'b', 'a', 'b']
"""
return chain.from_iterable(repeat(tuple(iterable), n))
def dotproduct(vec1, vec2):
"""Returns the dot product of the two iterables.
>>> dotproduct([10, 10], [20, 20])
400
"""
return sum(map(operator.mul, vec1, vec2))
def flatten(listOfLists):
"""Return an iterator flattening one level of nesting in a list of lists.
>>> list(flatten([[0, 1], [2, 3]]))
[0, 1, 2, 3]
See also :func:`collapse`, which can flatten multiple levels of nesting.
"""
return chain.from_iterable(listOfLists)
def repeatfunc(func, times=None, *args):
"""Call *func* with *args* repeatedly, returning an iterable over the
results.
If *times* is specified, the iterable will terminate after that many
repetitions:
>>> from operator import add
>>> times = 4
>>> args = 3, 5
>>> list(repeatfunc(add, times, *args))
[8, 8, 8, 8]
If *times* is ``None`` the iterable will not terminate:
>>> from random import randrange
>>> times = None
>>> args = 1, 11
>>> take(6, repeatfunc(randrange, times, *args)) # doctest:+SKIP
[2, 4, 8, 1, 8, 4]
"""
if times is None:
return starmap(func, repeat(args))
return starmap(func, repeat(args, times))
def _pairwise(iterable):
"""Returns an iterator of paired items, overlapping, from the original
>>> take(4, pairwise(count()))
[(0, 1), (1, 2), (2, 3), (3, 4)]
On Python 3.10 and above, this is an alias for :func:`itertools.pairwise`.
"""
a, b = tee(iterable)
next(b, None)
yield from zip(a, b)
try:
from itertools import pairwise as itertools_pairwise
except ImportError:
pairwise = _pairwise
else:
def pairwise(iterable):
yield from itertools_pairwise(iterable)
pairwise.__doc__ = _pairwise.__doc__
def grouper(iterable, n, fillvalue=None):
"""Collect data into fixed-length chunks or blocks.
>>> list(grouper('ABCDEFG', 3, 'x'))
[('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')]
"""
if isinstance(iterable, int):
warnings.warn(
"grouper expects iterable as first parameter", DeprecationWarning
)
n, iterable = iterable, n
args = [iter(iterable)] * n
return zip_longest(fillvalue=fillvalue, *args)
def roundrobin(*iterables):
"""Yields an item from each iterable, alternating between them.
>>> list(roundrobin('ABC', 'D', 'EF'))
['A', 'D', 'E', 'B', 'F', 'C']
This function produces the same output as :func:`interleave_longest`, but
may perform better for some inputs (in particular when the number of
iterables is small).
"""
# Recipe credited to George Sakkis
pending = len(iterables)
nexts = cycle(iter(it).__next__ for it in iterables)
while pending:
try:
for next in nexts:
yield next()
except StopIteration:
pending -= 1
nexts = cycle(islice(nexts, pending))
def partition(pred, iterable):
"""
Returns a 2-tuple of iterables derived from the input iterable.
The first yields the items that have ``pred(item) == False``.
The second yields the items that have ``pred(item) == True``.
>>> is_odd = lambda x: x % 2 != 0
>>> iterable = range(10)
>>> even_items, odd_items = partition(is_odd, iterable)
>>> list(even_items), list(odd_items)
([0, 2, 4, 6, 8], [1, 3, 5, 7, 9])
If *pred* is None, :func:`bool` is used.
>>> iterable = [0, 1, False, True, '', ' ']
>>> false_items, true_items = partition(None, iterable)
>>> list(false_items), list(true_items)
([0, False, ''], [1, True, ' '])
"""
if pred is None:
pred = bool
evaluations = ((pred(x), x) for x in iterable)
t1, t2 = tee(evaluations)
return (
(x for (cond, x) in t1 if not cond),
(x for (cond, x) in t2 if cond),
)
def powerset(iterable):
"""Yields all possible subsets of the iterable.
>>> list(powerset([1, 2, 3]))
[(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)]
:func:`powerset` will operate on iterables that aren't :class:`set`
instances, so repeated elements in the input will produce repeated elements
in the output. Use :func:`unique_everseen` on the input to avoid generating
duplicates:
>>> seq = [1, 1, 0]
>>> list(powerset(seq))
[(), (1,), (1,), (0,), (1, 1), (1, 0), (1, 0), (1, 1, 0)]
>>> from more_itertools import unique_everseen
>>> list(powerset(unique_everseen(seq)))
[(), (1,), (0,), (1, 0)]
"""
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
def unique_everseen(iterable, key=None):
"""
Yield unique elements, preserving order.
>>> list(unique_everseen('AAAABBBCCDAABBB'))
['A', 'B', 'C', 'D']
>>> list(unique_everseen('ABBCcAD', str.lower))
['A', 'B', 'C', 'D']
Sequences with a mix of hashable and unhashable items can be used.
The function will be slower (i.e., `O(n^2)`) for unhashable items.
Remember that ``list`` objects are unhashable - you can use the *key*
parameter to transform the list to a tuple (which is hashable) to
avoid a slowdown.
>>> iterable = ([1, 2], [2, 3], [1, 2])
>>> list(unique_everseen(iterable)) # Slow
[[1, 2], [2, 3]]
>>> list(unique_everseen(iterable, key=tuple)) # Faster
[[1, 2], [2, 3]]
Similary, you may want to convert unhashable ``set`` objects with
``key=frozenset``. For ``dict`` objects,
``key=lambda x: frozenset(x.items())`` can be used.
"""
seenset = set()
seenset_add = seenset.add
seenlist = []
seenlist_add = seenlist.append
use_key = key is not None
for element in iterable:
k = key(element) if use_key else element
try:
if k not in seenset:
seenset_add(k)
yield element
except TypeError:
if k not in seenlist:
seenlist_add(k)
yield element
def unique_justseen(iterable, key=None):
"""Yields elements in order, ignoring serial duplicates
>>> list(unique_justseen('AAAABBBCCDAABBB'))
['A', 'B', 'C', 'D', 'A', 'B']
>>> list(unique_justseen('ABBCcAD', str.lower))
['A', 'B', 'C', 'A', 'D']
"""
return map(next, map(operator.itemgetter(1), groupby(iterable, key)))
def iter_except(func, exception, first=None):
"""Yields results from a function repeatedly until an exception is raised.
Converts a call-until-exception interface to an iterator interface.
Like ``iter(func, sentinel)``, but uses an exception instead of a sentinel
to end the loop.
>>> l = [0, 1, 2]
>>> list(iter_except(l.pop, IndexError))
[2, 1, 0]
Multiple exceptions can be specified as a stopping condition:
>>> l = [1, 2, 3, '...', 4, 5, 6]
>>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError)))
[7, 6, 5]
>>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError)))
[4, 3, 2]
>>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError)))
[]
"""
try:
if first is not None:
yield first()
while 1:
yield func()
except exception:
pass
def first_true(iterable, default=None, pred=None):
"""
Returns the first true value in the iterable.
If no true value is found, returns *default*
If *pred* is not None, returns the first item for which
``pred(item) == True`` .
>>> first_true(range(10))
1
>>> first_true(range(10), pred=lambda x: x > 5)
6
>>> first_true(range(10), default='missing', pred=lambda x: x > 9)
'missing'
"""
return next(filter(pred, iterable), default)
def random_product(*args, repeat=1):
"""Draw an item at random from each of the input iterables.
>>> random_product('abc', range(4), 'XYZ') # doctest:+SKIP
('c', 3, 'Z')
If *repeat* is provided as a keyword argument, that many items will be
drawn from each iterable.
>>> random_product('abcd', range(4), repeat=2) # doctest:+SKIP
('a', 2, 'd', 3)
This equivalent to taking a random selection from
``itertools.product(*args, **kwarg)``.
"""
pools = [tuple(pool) for pool in args] * repeat
return tuple(choice(pool) for pool in pools)
def random_permutation(iterable, r=None):
"""Return a random *r* length permutation of the elements in *iterable*.
If *r* is not specified or is ``None``, then *r* defaults to the length of
*iterable*.
>>> random_permutation(range(5)) # doctest:+SKIP
(3, 4, 0, 1, 2)
This equivalent to taking a random selection from
``itertools.permutations(iterable, r)``.
"""
pool = tuple(iterable)
r = len(pool) if r is None else r
return tuple(sample(pool, r))
def random_combination(iterable, r):
"""Return a random *r* length subsequence of the elements in *iterable*.
>>> random_combination(range(5), 3) # doctest:+SKIP
(2, 3, 4)
This equivalent to taking a random selection from
``itertools.combinations(iterable, r)``.
"""
pool = tuple(iterable)
n = len(pool)
indices = sorted(sample(range(n), r))
return tuple(pool[i] for i in indices)
def random_combination_with_replacement(iterable, r):
"""Return a random *r* length subsequence of elements in *iterable*,
allowing individual elements to be repeated.
>>> random_combination_with_replacement(range(3), 5) # doctest:+SKIP
(0, 0, 1, 2, 2)
This equivalent to taking a random selection from
``itertools.combinations_with_replacement(iterable, r)``.
"""
pool = tuple(iterable)
n = len(pool)
indices = sorted(randrange(n) for i in range(r))
return tuple(pool[i] for i in indices)
def nth_combination(iterable, r, index):
"""Equivalent to ``list(combinations(iterable, r))[index]``.
The subsequences of *iterable* that are of length *r* can be ordered
lexicographically. :func:`nth_combination` computes the subsequence at
sort position *index* directly, without computing the previous
subsequences.
>>> nth_combination(range(5), 3, 5)
(0, 3, 4)
``ValueError`` will be raised If *r* is negative or greater than the length
of *iterable*.
``IndexError`` will be raised if the given *index* is invalid.
"""
pool = tuple(iterable)
n = len(pool)
if (r < 0) or (r > n):
raise ValueError
c = 1
k = min(r, n - r)
for i in range(1, k + 1):
c = c * (n - k + i) // i
if index < 0:
index += c
if (index < 0) or (index >= c):
raise IndexError
result = []
while r:
c, n, r = c * r // n, n - 1, r - 1
while index >= c:
index -= c
c, n = c * (n - r) // n, n - 1
result.append(pool[-1 - n])
return tuple(result)
def prepend(value, iterator):
"""Yield *value*, followed by the elements in *iterator*.
>>> value = '0'
>>> iterator = ['1', '2', '3']
>>> list(prepend(value, iterator))
['0', '1', '2', '3']
To prepend multiple values, see :func:`itertools.chain`
or :func:`value_chain`.
"""
return chain([value], iterator)
def convolve(signal, kernel):
"""Convolve the iterable *signal* with the iterable *kernel*.
>>> signal = (1, 2, 3, 4, 5)
>>> kernel = [3, 2, 1]
>>> list(convolve(signal, kernel))
[3, 8, 14, 20, 26, 14, 5]
Note: the input arguments are not interchangeable, as the *kernel*
is immediately consumed and stored.
"""
kernel = tuple(kernel)[::-1]
n = len(kernel)
window = deque([0], maxlen=n) * n
for x in chain(signal, repeat(0, n - 1)):
window.append(x)
yield sum(map(operator.mul, kernel, window))
def before_and_after(predicate, it):
"""A variant of :func:`takewhile` that allows complete access to the
remainder of the iterator.
>>> it = iter('ABCdEfGhI')
>>> all_upper, remainder = before_and_after(str.isupper, it)
>>> ''.join(all_upper)
'ABC'
>>> ''.join(remainder) # takewhile() would lose the 'd'
'dEfGhI'
Note that the first iterator must be fully consumed before the second
iterator can generate valid results.
"""
it = iter(it)
transition = []
def true_iterator():
for elem in it:
if predicate(elem):
yield elem
else:
transition.append(elem)
return
def remainder_iterator():
yield from transition
yield from it
return true_iterator(), remainder_iterator()
def triplewise(iterable):
"""Return overlapping triplets from *iterable*.
>>> list(triplewise('ABCDE'))
[('A', 'B', 'C'), ('B', 'C', 'D'), ('C', 'D', 'E')]
"""
for (a, _), (b, c) in pairwise(pairwise(iterable)):
yield a, b, c
def sliding_window(iterable, n):
"""Return a sliding window of width *n* over *iterable*.
>>> list(sliding_window(range(6), 4))
[(0, 1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5)]
If *iterable* has fewer than *n* items, then nothing is yielded:
>>> list(sliding_window(range(3), 4))
[]
For a variant with more features, see :func:`windowed`.
"""
it = iter(iterable)
window = deque(islice(it, n), maxlen=n)
if len(window) == n:
yield tuple(window)
for x in it:
window.append(x)
yield tuple(window)

View file

@ -0,0 +1,26 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
__all__ = [
"__title__",
"__summary__",
"__uri__",
"__version__",
"__author__",
"__email__",
"__license__",
"__copyright__",
]
__title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__version__ = "21.3"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"
__license__ = "BSD-2-Clause or Apache-2.0"
__copyright__ = "2014-2019 %s" % __author__

View file

@ -0,0 +1,25 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from .__about__ import (
__author__,
__copyright__,
__email__,
__license__,
__summary__,
__title__,
__uri__,
__version__,
)
__all__ = [
"__title__",
"__summary__",
"__uri__",
"__version__",
"__author__",
"__email__",
"__license__",
"__copyright__",
]

View file

@ -0,0 +1,301 @@
import collections
import functools
import os
import re
import struct
import sys
import warnings
from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple
# Python does not provide platform information at sufficient granularity to
# identify the architecture of the running executable in some cases, so we
# determine it dynamically by reading the information from the running
# process. This only applies on Linux, which uses the ELF format.
class _ELFFileHeader:
# https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header
class _InvalidELFFileHeader(ValueError):
"""
An invalid ELF file header was found.
"""
ELF_MAGIC_NUMBER = 0x7F454C46
ELFCLASS32 = 1
ELFCLASS64 = 2
ELFDATA2LSB = 1
ELFDATA2MSB = 2
EM_386 = 3
EM_S390 = 22
EM_ARM = 40
EM_X86_64 = 62
EF_ARM_ABIMASK = 0xFF000000
EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400
def __init__(self, file: IO[bytes]) -> None:
def unpack(fmt: str) -> int:
try:
data = file.read(struct.calcsize(fmt))
result: Tuple[int, ...] = struct.unpack(fmt, data)
except struct.error:
raise _ELFFileHeader._InvalidELFFileHeader()
return result[0]
self.e_ident_magic = unpack(">I")
if self.e_ident_magic != self.ELF_MAGIC_NUMBER:
raise _ELFFileHeader._InvalidELFFileHeader()
self.e_ident_class = unpack("B")
if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}:
raise _ELFFileHeader._InvalidELFFileHeader()
self.e_ident_data = unpack("B")
if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}:
raise _ELFFileHeader._InvalidELFFileHeader()
self.e_ident_version = unpack("B")
self.e_ident_osabi = unpack("B")
self.e_ident_abiversion = unpack("B")
self.e_ident_pad = file.read(7)
format_h = "<H" if self.e_ident_data == self.ELFDATA2LSB else ">H"
format_i = "<I" if self.e_ident_data == self.ELFDATA2LSB else ">I"
format_q = "<Q" if self.e_ident_data == self.ELFDATA2LSB else ">Q"
format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q
self.e_type = unpack(format_h)
self.e_machine = unpack(format_h)
self.e_version = unpack(format_i)
self.e_entry = unpack(format_p)
self.e_phoff = unpack(format_p)
self.e_shoff = unpack(format_p)
self.e_flags = unpack(format_i)
self.e_ehsize = unpack(format_h)
self.e_phentsize = unpack(format_h)
self.e_phnum = unpack(format_h)
self.e_shentsize = unpack(format_h)
self.e_shnum = unpack(format_h)
self.e_shstrndx = unpack(format_h)
def _get_elf_header() -> Optional[_ELFFileHeader]:
try:
with open(sys.executable, "rb") as f:
elf_header = _ELFFileHeader(f)
except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader):
return None
return elf_header
def _is_linux_armhf() -> bool:
# hard-float ABI can be detected from the ELF header of the running
# process
# https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
elf_header = _get_elf_header()
if elf_header is None:
return False
result = elf_header.e_ident_class == elf_header.ELFCLASS32
result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB
result &= elf_header.e_machine == elf_header.EM_ARM
result &= (
elf_header.e_flags & elf_header.EF_ARM_ABIMASK
) == elf_header.EF_ARM_ABI_VER5
result &= (
elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD
) == elf_header.EF_ARM_ABI_FLOAT_HARD
return result
def _is_linux_i686() -> bool:
elf_header = _get_elf_header()
if elf_header is None:
return False
result = elf_header.e_ident_class == elf_header.ELFCLASS32
result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB
result &= elf_header.e_machine == elf_header.EM_386
return result
def _have_compatible_abi(arch: str) -> bool:
if arch == "armv7l":
return _is_linux_armhf()
if arch == "i686":
return _is_linux_i686()
return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"}
# If glibc ever changes its major version, we need to know what the last
# minor version was, so we can build the complete list of all versions.
# For now, guess what the highest minor version might be, assume it will
# be 50 for testing. Once this actually happens, update the dictionary
# with the actual value.
_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50)
class _GLibCVersion(NamedTuple):
major: int
minor: int
def _glibc_version_string_confstr() -> Optional[str]:
"""
Primary implementation of glibc_version_string using os.confstr.
"""
# os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
# to be broken or missing. This strategy is used in the standard library
# platform module.
# https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
try:
# os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17".
version_string = os.confstr("CS_GNU_LIBC_VERSION")
assert version_string is not None
_, version = version_string.split()
except (AssertionError, AttributeError, OSError, ValueError):
# os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
return None
return version
def _glibc_version_string_ctypes() -> Optional[str]:
"""
Fallback implementation of glibc_version_string using ctypes.
"""
try:
import ctypes
except ImportError:
return None
# ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
# manpage says, "If filename is NULL, then the returned handle is for the
# main program". This way we can let the linker do the work to figure out
# which libc our process is actually using.
#
# We must also handle the special case where the executable is not a
# dynamically linked executable. This can occur when using musl libc,
# for example. In this situation, dlopen() will error, leading to an
# OSError. Interestingly, at least in the case of musl, there is no
# errno set on the OSError. The single string argument used to construct
# OSError comes from libc itself and is therefore not portable to
# hard code here. In any case, failure to call dlopen() means we
# can proceed, so we bail on our attempt.
try:
process_namespace = ctypes.CDLL(None)
except OSError:
return None
try:
gnu_get_libc_version = process_namespace.gnu_get_libc_version
except AttributeError:
# Symbol doesn't exist -> therefore, we are not linked to
# glibc.
return None
# Call gnu_get_libc_version, which returns a string like "2.5"
gnu_get_libc_version.restype = ctypes.c_char_p
version_str: str = gnu_get_libc_version()
# py2 / py3 compatibility:
if not isinstance(version_str, str):
version_str = version_str.decode("ascii")
return version_str
def _glibc_version_string() -> Optional[str]:
"""Returns glibc version string, or None if not using glibc."""
return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
def _parse_glibc_version(version_str: str) -> Tuple[int, int]:
"""Parse glibc version.
We use a regexp instead of str.split because we want to discard any
random junk that might come after the minor version -- this might happen
in patched/forked versions of glibc (e.g. Linaro's version of glibc
uses version strings like "2.20-2014.11"). See gh-3588.
"""
m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
if not m:
warnings.warn(
"Expected glibc version with 2 components major.minor,"
" got: %s" % version_str,
RuntimeWarning,
)
return -1, -1
return int(m.group("major")), int(m.group("minor"))
@functools.lru_cache()
def _get_glibc_version() -> Tuple[int, int]:
version_str = _glibc_version_string()
if version_str is None:
return (-1, -1)
return _parse_glibc_version(version_str)
# From PEP 513, PEP 600
def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool:
sys_glibc = _get_glibc_version()
if sys_glibc < version:
return False
# Check for presence of _manylinux module.
try:
import _manylinux # noqa
except ImportError:
return True
if hasattr(_manylinux, "manylinux_compatible"):
result = _manylinux.manylinux_compatible(version[0], version[1], arch)
if result is not None:
return bool(result)
return True
if version == _GLibCVersion(2, 5):
if hasattr(_manylinux, "manylinux1_compatible"):
return bool(_manylinux.manylinux1_compatible)
if version == _GLibCVersion(2, 12):
if hasattr(_manylinux, "manylinux2010_compatible"):
return bool(_manylinux.manylinux2010_compatible)
if version == _GLibCVersion(2, 17):
if hasattr(_manylinux, "manylinux2014_compatible"):
return bool(_manylinux.manylinux2014_compatible)
return True
_LEGACY_MANYLINUX_MAP = {
# CentOS 7 w/ glibc 2.17 (PEP 599)
(2, 17): "manylinux2014",
# CentOS 6 w/ glibc 2.12 (PEP 571)
(2, 12): "manylinux2010",
# CentOS 5 w/ glibc 2.5 (PEP 513)
(2, 5): "manylinux1",
}
def platform_tags(linux: str, arch: str) -> Iterator[str]:
if not _have_compatible_abi(arch):
return
# Oldest glibc to be supported regardless of architecture is (2, 17).
too_old_glibc2 = _GLibCVersion(2, 16)
if arch in {"x86_64", "i686"}:
# On x86/i686 also oldest glibc to be supported is (2, 5).
too_old_glibc2 = _GLibCVersion(2, 4)
current_glibc = _GLibCVersion(*_get_glibc_version())
glibc_max_list = [current_glibc]
# We can assume compatibility across glibc major versions.
# https://sourceware.org/bugzilla/show_bug.cgi?id=24636
#
# Build a list of maximum glibc versions so that we can
# output the canonical list of all glibc from current_glibc
# down to too_old_glibc2, including all intermediary versions.
for glibc_major in range(current_glibc.major - 1, 1, -1):
glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
for glibc_max in glibc_max_list:
if glibc_max.major == too_old_glibc2.major:
min_minor = too_old_glibc2.minor
else:
# For other glibc major versions oldest supported is (x, 0).
min_minor = -1
for glibc_minor in range(glibc_max.minor, min_minor, -1):
glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
tag = "manylinux_{}_{}".format(*glibc_version)
if _is_compatible(tag, arch, glibc_version):
yield linux.replace("linux", tag)
# Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
if glibc_version in _LEGACY_MANYLINUX_MAP:
legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
if _is_compatible(legacy_tag, arch, glibc_version):
yield linux.replace("linux", legacy_tag)

View file

@ -0,0 +1,136 @@
"""PEP 656 support.
This module implements logic to detect if the currently running Python is
linked against musl, and what musl version is used.
"""
import contextlib
import functools
import operator
import os
import re
import struct
import subprocess
import sys
from typing import IO, Iterator, NamedTuple, Optional, Tuple
def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]:
return struct.unpack(fmt, f.read(struct.calcsize(fmt)))
def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]:
"""Detect musl libc location by parsing the Python executable.
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
"""
f.seek(0)
try:
ident = _read_unpacked(f, "16B")
except struct.error:
return None
if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF.
return None
f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version.
try:
# e_fmt: Format for program header.
# p_fmt: Format for section header.
# p_idx: Indexes to find p_type, p_offset, and p_filesz.
e_fmt, p_fmt, p_idx = {
1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit.
2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit.
}[ident[4]]
except KeyError:
return None
else:
p_get = operator.itemgetter(*p_idx)
# Find the interpreter section and return its content.
try:
_, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt)
except struct.error:
return None
for i in range(e_phnum + 1):
f.seek(e_phoff + e_phentsize * i)
try:
p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt))
except struct.error:
return None
if p_type != 3: # Not PT_INTERP.
continue
f.seek(p_offset)
interpreter = os.fsdecode(f.read(p_filesz)).strip("\0")
if "musl" not in interpreter:
return None
return interpreter
return None
class _MuslVersion(NamedTuple):
major: int
minor: int
def _parse_musl_version(output: str) -> Optional[_MuslVersion]:
lines = [n for n in (n.strip() for n in output.splitlines()) if n]
if len(lines) < 2 or lines[0][:4] != "musl":
return None
m = re.match(r"Version (\d+)\.(\d+)", lines[1])
if not m:
return None
return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
@functools.lru_cache()
def _get_musl_version(executable: str) -> Optional[_MuslVersion]:
"""Detect currently-running musl runtime version.
This is done by checking the specified executable's dynamic linking
information, and invoking the loader to parse its output for a version
string. If the loader is musl, the output would be something like::
musl libc (x86_64)
Version 1.2.2
Dynamic Program Loader
"""
with contextlib.ExitStack() as stack:
try:
f = stack.enter_context(open(executable, "rb"))
except OSError:
return None
ld = _parse_ld_musl_from_elf(f)
if not ld:
return None
proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True)
return _parse_musl_version(proc.stderr)
def platform_tags(arch: str) -> Iterator[str]:
"""Generate musllinux tags compatible to the current platform.
:param arch: Should be the part of platform tag after the ``linux_``
prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a
prerequisite for the current platform to be musllinux-compatible.
:returns: An iterator of compatible musllinux tags.
"""
sys_musl = _get_musl_version(sys.executable)
if sys_musl is None: # Python not dynamically linked against musl.
return
for minor in range(sys_musl.minor, -1, -1):
yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
if __name__ == "__main__": # pragma: no cover
import sysconfig
plat = sysconfig.get_platform()
assert plat.startswith("linux-"), "not linux"
print("plat:", plat)
print("musl:", _get_musl_version(sys.executable))
print("tags:", end=" ")
for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
print(t, end="\n ")

View file

@ -0,0 +1,61 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
class InfinityType:
def __repr__(self) -> str:
return "Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return False
def __le__(self, other: object) -> bool:
return False
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__)
def __gt__(self, other: object) -> bool:
return True
def __ge__(self, other: object) -> bool:
return True
def __neg__(self: object) -> "NegativeInfinityType":
return NegativeInfinity
Infinity = InfinityType()
class NegativeInfinityType:
def __repr__(self) -> str:
return "-Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return True
def __le__(self, other: object) -> bool:
return True
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__)
def __gt__(self, other: object) -> bool:
return False
def __ge__(self, other: object) -> bool:
return False
def __neg__(self: object) -> InfinityType:
return Infinity
NegativeInfinity = NegativeInfinityType()

View file

@ -0,0 +1,304 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import operator
import os
import platform
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from pkg_resources.extern.pyparsing import ( # noqa: N817
Forward,
Group,
Literal as L,
ParseException,
ParseResults,
QuotedString,
ZeroOrMore,
stringEnd,
stringStart,
)
from .specifiers import InvalidSpecifier, Specifier
__all__ = [
"InvalidMarker",
"UndefinedComparison",
"UndefinedEnvironmentName",
"Marker",
"default_environment",
]
Operator = Callable[[str, str], bool]
class InvalidMarker(ValueError):
"""
An invalid marker was found, users should refer to PEP 508.
"""
class UndefinedComparison(ValueError):
"""
An invalid operation was attempted on a value that doesn't support it.
"""
class UndefinedEnvironmentName(ValueError):
"""
A name was attempted to be used that does not exist inside of the
environment.
"""
class Node:
def __init__(self, value: Any) -> None:
self.value = value
def __str__(self) -> str:
return str(self.value)
def __repr__(self) -> str:
return f"<{self.__class__.__name__}('{self}')>"
def serialize(self) -> str:
raise NotImplementedError
class Variable(Node):
def serialize(self) -> str:
return str(self)
class Value(Node):
def serialize(self) -> str:
return f'"{self}"'
class Op(Node):
def serialize(self) -> str:
return str(self)
VARIABLE = (
L("implementation_version")
| L("platform_python_implementation")
| L("implementation_name")
| L("python_full_version")
| L("platform_release")
| L("platform_version")
| L("platform_machine")
| L("platform_system")
| L("python_version")
| L("sys_platform")
| L("os_name")
| L("os.name") # PEP-345
| L("sys.platform") # PEP-345
| L("platform.version") # PEP-345
| L("platform.machine") # PEP-345
| L("platform.python_implementation") # PEP-345
| L("python_implementation") # undocumented setuptools legacy
| L("extra") # PEP-508
)
ALIASES = {
"os.name": "os_name",
"sys.platform": "sys_platform",
"platform.version": "platform_version",
"platform.machine": "platform_machine",
"platform.python_implementation": "platform_python_implementation",
"python_implementation": "platform_python_implementation",
}
VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0])))
VERSION_CMP = (
L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<")
)
MARKER_OP = VERSION_CMP | L("not in") | L("in")
MARKER_OP.setParseAction(lambda s, l, t: Op(t[0]))
MARKER_VALUE = QuotedString("'") | QuotedString('"')
MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0]))
BOOLOP = L("and") | L("or")
MARKER_VAR = VARIABLE | MARKER_VALUE
MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR)
MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0]))
LPAREN = L("(").suppress()
RPAREN = L(")").suppress()
MARKER_EXPR = Forward()
MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN)
MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
MARKER = stringStart + MARKER_EXPR + stringEnd
def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]:
if isinstance(results, ParseResults):
return [_coerce_parse_result(i) for i in results]
else:
return results
def _format_marker(
marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True
) -> str:
assert isinstance(marker, (list, tuple, str))
# Sometimes we have a structure like [[...]] which is a single item list
# where the single item is itself it's own list. In that case we want skip
# the rest of this function so that we don't get extraneous () on the
# outside.
if (
isinstance(marker, list)
and len(marker) == 1
and isinstance(marker[0], (list, tuple))
):
return _format_marker(marker[0])
if isinstance(marker, list):
inner = (_format_marker(m, first=False) for m in marker)
if first:
return " ".join(inner)
else:
return "(" + " ".join(inner) + ")"
elif isinstance(marker, tuple):
return " ".join([m.serialize() for m in marker])
else:
return marker
_operators: Dict[str, Operator] = {
"in": lambda lhs, rhs: lhs in rhs,
"not in": lambda lhs, rhs: lhs not in rhs,
"<": operator.lt,
"<=": operator.le,
"==": operator.eq,
"!=": operator.ne,
">=": operator.ge,
">": operator.gt,
}
def _eval_op(lhs: str, op: Op, rhs: str) -> bool:
try:
spec = Specifier("".join([op.serialize(), rhs]))
except InvalidSpecifier:
pass
else:
return spec.contains(lhs)
oper: Optional[Operator] = _operators.get(op.serialize())
if oper is None:
raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
return oper(lhs, rhs)
class Undefined:
pass
_undefined = Undefined()
def _get_env(environment: Dict[str, str], name: str) -> str:
value: Union[str, Undefined] = environment.get(name, _undefined)
if isinstance(value, Undefined):
raise UndefinedEnvironmentName(
f"{name!r} does not exist in evaluation environment."
)
return value
def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool:
groups: List[List[bool]] = [[]]
for marker in markers:
assert isinstance(marker, (list, tuple, str))
if isinstance(marker, list):
groups[-1].append(_evaluate_markers(marker, environment))
elif isinstance(marker, tuple):
lhs, op, rhs = marker
if isinstance(lhs, Variable):
lhs_value = _get_env(environment, lhs.value)
rhs_value = rhs.value
else:
lhs_value = lhs.value
rhs_value = _get_env(environment, rhs.value)
groups[-1].append(_eval_op(lhs_value, op, rhs_value))
else:
assert marker in ["and", "or"]
if marker == "or":
groups.append([])
return any(all(item) for item in groups)
def format_full_version(info: "sys._version_info") -> str:
version = "{0.major}.{0.minor}.{0.micro}".format(info)
kind = info.releaselevel
if kind != "final":
version += kind[0] + str(info.serial)
return version
def default_environment() -> Dict[str, str]:
iver = format_full_version(sys.implementation.version)
implementation_name = sys.implementation.name
return {
"implementation_name": implementation_name,
"implementation_version": iver,
"os_name": os.name,
"platform_machine": platform.machine(),
"platform_release": platform.release(),
"platform_system": platform.system(),
"platform_version": platform.version(),
"python_full_version": platform.python_version(),
"platform_python_implementation": platform.python_implementation(),
"python_version": ".".join(platform.python_version_tuple()[:2]),
"sys_platform": sys.platform,
}
class Marker:
def __init__(self, marker: str) -> None:
try:
self._markers = _coerce_parse_result(MARKER.parseString(marker))
except ParseException as e:
raise InvalidMarker(
f"Invalid marker: {marker!r}, parse error at "
f"{marker[e.loc : e.loc + 8]!r}"
)
def __str__(self) -> str:
return _format_marker(self._markers)
def __repr__(self) -> str:
return f"<Marker('{self}')>"
def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool:
"""Evaluate a marker.
Return the boolean from evaluating the given marker against the
environment. environment is an optional argument to override all or
part of the determined environment.
The environment is determined from the current Python process.
"""
current_environment = default_environment()
if environment is not None:
current_environment.update(environment)
return _evaluate_markers(self._markers, current_environment)

View file

@ -0,0 +1,146 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import re
import string
import urllib.parse
from typing import List, Optional as TOptional, Set
from pkg_resources.extern.pyparsing import ( # noqa
Combine,
Literal as L,
Optional,
ParseException,
Regex,
Word,
ZeroOrMore,
originalTextFor,
stringEnd,
stringStart,
)
from .markers import MARKER_EXPR, Marker
from .specifiers import LegacySpecifier, Specifier, SpecifierSet
class InvalidRequirement(ValueError):
"""
An invalid requirement was found, users should refer to PEP 508.
"""
ALPHANUM = Word(string.ascii_letters + string.digits)
LBRACKET = L("[").suppress()
RBRACKET = L("]").suppress()
LPAREN = L("(").suppress()
RPAREN = L(")").suppress()
COMMA = L(",").suppress()
SEMICOLON = L(";").suppress()
AT = L("@").suppress()
PUNCTUATION = Word("-_.")
IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM)
IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END))
NAME = IDENTIFIER("name")
EXTRA = IDENTIFIER
URI = Regex(r"[^ ]+")("url")
URL = AT + URI
EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA)
EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras")
VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE)
VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE)
VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY
VERSION_MANY = Combine(
VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False
)("_raw_spec")
_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)
_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "")
VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier")
VERSION_SPEC.setParseAction(lambda s, l, t: t[1])
MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker")
MARKER_EXPR.setParseAction(
lambda s, l, t: Marker(s[t._original_start : t._original_end])
)
MARKER_SEPARATOR = SEMICOLON
MARKER = MARKER_SEPARATOR + MARKER_EXPR
VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER)
URL_AND_MARKER = URL + Optional(MARKER)
NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER)
REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd
# pkg_resources.extern.pyparsing isn't thread safe during initialization, so we do it eagerly, see
# issue #104
REQUIREMENT.parseString("x[]")
class Requirement:
"""Parse a requirement.
Parse a given requirement string into its parts, such as name, specifier,
URL, and extras. Raises InvalidRequirement on a badly-formed requirement
string.
"""
# TODO: Can we test whether something is contained within a requirement?
# If so how do we do that? Do we need to test against the _name_ of
# the thing as well as the version? What about the markers?
# TODO: Can we normalize the name and extra name?
def __init__(self, requirement_string: str) -> None:
try:
req = REQUIREMENT.parseString(requirement_string)
except ParseException as e:
raise InvalidRequirement(
f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}'
)
self.name: str = req.name
if req.url:
parsed_url = urllib.parse.urlparse(req.url)
if parsed_url.scheme == "file":
if urllib.parse.urlunparse(parsed_url) != req.url:
raise InvalidRequirement("Invalid URL given")
elif not (parsed_url.scheme and parsed_url.netloc) or (
not parsed_url.scheme and not parsed_url.netloc
):
raise InvalidRequirement(f"Invalid URL: {req.url}")
self.url: TOptional[str] = req.url
else:
self.url = None
self.extras: Set[str] = set(req.extras.asList() if req.extras else [])
self.specifier: SpecifierSet = SpecifierSet(req.specifier)
self.marker: TOptional[Marker] = req.marker if req.marker else None
def __str__(self) -> str:
parts: List[str] = [self.name]
if self.extras:
formatted_extras = ",".join(sorted(self.extras))
parts.append(f"[{formatted_extras}]")
if self.specifier:
parts.append(str(self.specifier))
if self.url:
parts.append(f"@ {self.url}")
if self.marker:
parts.append(" ")
if self.marker:
parts.append(f"; {self.marker}")
return "".join(parts)
def __repr__(self) -> str:
return f"<Requirement('{self}')>"

View file

@ -0,0 +1,802 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import abc
import functools
import itertools
import re
import warnings
from typing import (
Callable,
Dict,
Iterable,
Iterator,
List,
Optional,
Pattern,
Set,
Tuple,
TypeVar,
Union,
)
from .utils import canonicalize_version
from .version import LegacyVersion, Version, parse
ParsedVersion = Union[Version, LegacyVersion]
UnparsedVersion = Union[Version, LegacyVersion, str]
VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion)
CallableOperator = Callable[[ParsedVersion, str], bool]
class InvalidSpecifier(ValueError):
"""
An invalid specifier was found, users should refer to PEP 440.
"""
class BaseSpecifier(metaclass=abc.ABCMeta):
@abc.abstractmethod
def __str__(self) -> str:
"""
Returns the str representation of this Specifier like object. This
should be representative of the Specifier itself.
"""
@abc.abstractmethod
def __hash__(self) -> int:
"""
Returns a hash value for this Specifier like object.
"""
@abc.abstractmethod
def __eq__(self, other: object) -> bool:
"""
Returns a boolean representing whether or not the two Specifier like
objects are equal.
"""
@abc.abstractproperty
def prereleases(self) -> Optional[bool]:
"""
Returns whether or not pre-releases as a whole are allowed by this
specifier.
"""
@prereleases.setter
def prereleases(self, value: bool) -> None:
"""
Sets whether or not pre-releases as a whole are allowed by this
specifier.
"""
@abc.abstractmethod
def contains(self, item: str, prereleases: Optional[bool] = None) -> bool:
"""
Determines if the given item is contained within this specifier.
"""
@abc.abstractmethod
def filter(
self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None
) -> Iterable[VersionTypeVar]:
"""
Takes an iterable of items and filters them so that only items which
are contained within this specifier are allowed in it.
"""
class _IndividualSpecifier(BaseSpecifier):
_operators: Dict[str, str] = {}
_regex: Pattern[str]
def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None:
match = self._regex.search(spec)
if not match:
raise InvalidSpecifier(f"Invalid specifier: '{spec}'")
self._spec: Tuple[str, str] = (
match.group("operator").strip(),
match.group("version").strip(),
)
# Store whether or not this Specifier should accept prereleases
self._prereleases = prereleases
def __repr__(self) -> str:
pre = (
f", prereleases={self.prereleases!r}"
if self._prereleases is not None
else ""
)
return f"<{self.__class__.__name__}({str(self)!r}{pre})>"
def __str__(self) -> str:
return "{}{}".format(*self._spec)
@property
def _canonical_spec(self) -> Tuple[str, str]:
return self._spec[0], canonicalize_version(self._spec[1])
def __hash__(self) -> int:
return hash(self._canonical_spec)
def __eq__(self, other: object) -> bool:
if isinstance(other, str):
try:
other = self.__class__(str(other))
except InvalidSpecifier:
return NotImplemented
elif not isinstance(other, self.__class__):
return NotImplemented
return self._canonical_spec == other._canonical_spec
def _get_operator(self, op: str) -> CallableOperator:
operator_callable: CallableOperator = getattr(
self, f"_compare_{self._operators[op]}"
)
return operator_callable
def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion:
if not isinstance(version, (LegacyVersion, Version)):
version = parse(version)
return version
@property
def operator(self) -> str:
return self._spec[0]
@property
def version(self) -> str:
return self._spec[1]
@property
def prereleases(self) -> Optional[bool]:
return self._prereleases
@prereleases.setter
def prereleases(self, value: bool) -> None:
self._prereleases = value
def __contains__(self, item: str) -> bool:
return self.contains(item)
def contains(
self, item: UnparsedVersion, prereleases: Optional[bool] = None
) -> bool:
# Determine if prereleases are to be allowed or not.
if prereleases is None:
prereleases = self.prereleases
# Normalize item to a Version or LegacyVersion, this allows us to have
# a shortcut for ``"2.0" in Specifier(">=2")
normalized_item = self._coerce_version(item)
# Determine if we should be supporting prereleases in this specifier
# or not, if we do not support prereleases than we can short circuit
# logic if this version is a prereleases.
if normalized_item.is_prerelease and not prereleases:
return False
# Actually do the comparison to determine if this item is contained
# within this Specifier or not.
operator_callable: CallableOperator = self._get_operator(self.operator)
return operator_callable(normalized_item, self.version)
def filter(
self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None
) -> Iterable[VersionTypeVar]:
yielded = False
found_prereleases = []
kw = {"prereleases": prereleases if prereleases is not None else True}
# Attempt to iterate over all the values in the iterable and if any of
# them match, yield them.
for version in iterable:
parsed_version = self._coerce_version(version)
if self.contains(parsed_version, **kw):
# If our version is a prerelease, and we were not set to allow
# prereleases, then we'll store it for later in case nothing
# else matches this specifier.
if parsed_version.is_prerelease and not (
prereleases or self.prereleases
):
found_prereleases.append(version)
# Either this is not a prerelease, or we should have been
# accepting prereleases from the beginning.
else:
yielded = True
yield version
# Now that we've iterated over everything, determine if we've yielded
# any values, and if we have not and we have any prereleases stored up
# then we will go ahead and yield the prereleases.
if not yielded and found_prereleases:
for version in found_prereleases:
yield version
class LegacySpecifier(_IndividualSpecifier):
_regex_str = r"""
(?P<operator>(==|!=|<=|>=|<|>))
\s*
(?P<version>
[^,;\s)]* # Since this is a "legacy" specifier, and the version
# string can be just about anything, we match everything
# except for whitespace, a semi-colon for marker support,
# a closing paren since versions can be enclosed in
# them, and a comma since it's a version separator.
)
"""
_regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE)
_operators = {
"==": "equal",
"!=": "not_equal",
"<=": "less_than_equal",
">=": "greater_than_equal",
"<": "less_than",
">": "greater_than",
}
def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None:
super().__init__(spec, prereleases)
warnings.warn(
"Creating a LegacyVersion has been deprecated and will be "
"removed in the next major release",
DeprecationWarning,
)
def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion:
if not isinstance(version, LegacyVersion):
version = LegacyVersion(str(version))
return version
def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool:
return prospective == self._coerce_version(spec)
def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool:
return prospective != self._coerce_version(spec)
def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool:
return prospective <= self._coerce_version(spec)
def _compare_greater_than_equal(
self, prospective: LegacyVersion, spec: str
) -> bool:
return prospective >= self._coerce_version(spec)
def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool:
return prospective < self._coerce_version(spec)
def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool:
return prospective > self._coerce_version(spec)
def _require_version_compare(
fn: Callable[["Specifier", ParsedVersion, str], bool]
) -> Callable[["Specifier", ParsedVersion, str], bool]:
@functools.wraps(fn)
def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool:
if not isinstance(prospective, Version):
return False
return fn(self, prospective, spec)
return wrapped
class Specifier(_IndividualSpecifier):
_regex_str = r"""
(?P<operator>(~=|==|!=|<=|>=|<|>|===))
(?P<version>
(?:
# The identity operators allow for an escape hatch that will
# do an exact string match of the version you wish to install.
# This will not be parsed by PEP 440 and we cannot determine
# any semantic meaning from it. This operator is discouraged
# but included entirely as an escape hatch.
(?<====) # Only match for the identity operator
\s*
[^\s]* # We just match everything, except for whitespace
# since we are only testing for strict identity.
)
|
(?:
# The (non)equality operators allow for wild card and local
# versions to be specified so we have to define these two
# operators separately to enable that.
(?<===|!=) # Only match for equals and not equals
\s*
v?
(?:[0-9]+!)? # epoch
[0-9]+(?:\.[0-9]+)* # release
(?: # pre release
[-_\.]?
(a|b|c|rc|alpha|beta|pre|preview)
[-_\.]?
[0-9]*
)?
(?: # post release
(?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
)?
# You cannot use a wild card and a dev or local version
# together so group them with a | and make them optional.
(?:
(?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release
(?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local
|
\.\* # Wild card syntax of .*
)?
)
|
(?:
# The compatible operator requires at least two digits in the
# release segment.
(?<=~=) # Only match for the compatible operator
\s*
v?
(?:[0-9]+!)? # epoch
[0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *)
(?: # pre release
[-_\.]?
(a|b|c|rc|alpha|beta|pre|preview)
[-_\.]?
[0-9]*
)?
(?: # post release
(?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
)?
(?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release
)
|
(?:
# All other operators only allow a sub set of what the
# (non)equality operators do. Specifically they do not allow
# local versions to be specified nor do they allow the prefix
# matching wild cards.
(?<!==|!=|~=) # We have special cases for these
# operators so we want to make sure they
# don't match here.
\s*
v?
(?:[0-9]+!)? # epoch
[0-9]+(?:\.[0-9]+)* # release
(?: # pre release
[-_\.]?
(a|b|c|rc|alpha|beta|pre|preview)
[-_\.]?
[0-9]*
)?
(?: # post release
(?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
)?
(?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release
)
)
"""
_regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE)
_operators = {
"~=": "compatible",
"==": "equal",
"!=": "not_equal",
"<=": "less_than_equal",
">=": "greater_than_equal",
"<": "less_than",
">": "greater_than",
"===": "arbitrary",
}
@_require_version_compare
def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool:
# Compatible releases have an equivalent combination of >= and ==. That
# is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to
# implement this in terms of the other specifiers instead of
# implementing it ourselves. The only thing we need to do is construct
# the other specifiers.
# We want everything but the last item in the version, but we want to
# ignore suffix segments.
prefix = ".".join(
list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1]
)
# Add the prefix notation to the end of our string
prefix += ".*"
return self._get_operator(">=")(prospective, spec) and self._get_operator("==")(
prospective, prefix
)
@_require_version_compare
def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool:
# We need special logic to handle prefix matching
if spec.endswith(".*"):
# In the case of prefix matching we want to ignore local segment.
prospective = Version(prospective.public)
# Split the spec out by dots, and pretend that there is an implicit
# dot in between a release segment and a pre-release segment.
split_spec = _version_split(spec[:-2]) # Remove the trailing .*
# Split the prospective version out by dots, and pretend that there
# is an implicit dot in between a release segment and a pre-release
# segment.
split_prospective = _version_split(str(prospective))
# Shorten the prospective version to be the same length as the spec
# so that we can determine if the specifier is a prefix of the
# prospective version or not.
shortened_prospective = split_prospective[: len(split_spec)]
# Pad out our two sides with zeros so that they both equal the same
# length.
padded_spec, padded_prospective = _pad_version(
split_spec, shortened_prospective
)
return padded_prospective == padded_spec
else:
# Convert our spec string into a Version
spec_version = Version(spec)
# If the specifier does not have a local segment, then we want to
# act as if the prospective version also does not have a local
# segment.
if not spec_version.local:
prospective = Version(prospective.public)
return prospective == spec_version
@_require_version_compare
def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool:
return not self._compare_equal(prospective, spec)
@_require_version_compare
def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool:
# NB: Local version identifiers are NOT permitted in the version
# specifier, so local version labels can be universally removed from
# the prospective version.
return Version(prospective.public) <= Version(spec)
@_require_version_compare
def _compare_greater_than_equal(
self, prospective: ParsedVersion, spec: str
) -> bool:
# NB: Local version identifiers are NOT permitted in the version
# specifier, so local version labels can be universally removed from
# the prospective version.
return Version(prospective.public) >= Version(spec)
@_require_version_compare
def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool:
# Convert our spec to a Version instance, since we'll want to work with
# it as a version.
spec = Version(spec_str)
# Check to see if the prospective version is less than the spec
# version. If it's not we can short circuit and just return False now
# instead of doing extra unneeded work.
if not prospective < spec:
return False
# This special case is here so that, unless the specifier itself
# includes is a pre-release version, that we do not accept pre-release
# versions for the version mentioned in the specifier (e.g. <3.1 should
# not match 3.1.dev0, but should match 3.0.dev0).
if not spec.is_prerelease and prospective.is_prerelease:
if Version(prospective.base_version) == Version(spec.base_version):
return False
# If we've gotten to here, it means that prospective version is both
# less than the spec version *and* it's not a pre-release of the same
# version in the spec.
return True
@_require_version_compare
def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool:
# Convert our spec to a Version instance, since we'll want to work with
# it as a version.
spec = Version(spec_str)
# Check to see if the prospective version is greater than the spec
# version. If it's not we can short circuit and just return False now
# instead of doing extra unneeded work.
if not prospective > spec:
return False
# This special case is here so that, unless the specifier itself
# includes is a post-release version, that we do not accept
# post-release versions for the version mentioned in the specifier
# (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0).
if not spec.is_postrelease and prospective.is_postrelease:
if Version(prospective.base_version) == Version(spec.base_version):
return False
# Ensure that we do not allow a local version of the version mentioned
# in the specifier, which is technically greater than, to match.
if prospective.local is not None:
if Version(prospective.base_version) == Version(spec.base_version):
return False
# If we've gotten to here, it means that prospective version is both
# greater than the spec version *and* it's not a pre-release of the
# same version in the spec.
return True
def _compare_arbitrary(self, prospective: Version, spec: str) -> bool:
return str(prospective).lower() == str(spec).lower()
@property
def prereleases(self) -> bool:
# If there is an explicit prereleases set for this, then we'll just
# blindly use that.
if self._prereleases is not None:
return self._prereleases
# Look at all of our specifiers and determine if they are inclusive
# operators, and if they are if they are including an explicit
# prerelease.
operator, version = self._spec
if operator in ["==", ">=", "<=", "~=", "==="]:
# The == specifier can include a trailing .*, if it does we
# want to remove before parsing.
if operator == "==" and version.endswith(".*"):
version = version[:-2]
# Parse the version, and if it is a pre-release than this
# specifier allows pre-releases.
if parse(version).is_prerelease:
return True
return False
@prereleases.setter
def prereleases(self, value: bool) -> None:
self._prereleases = value
_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$")
def _version_split(version: str) -> List[str]:
result: List[str] = []
for item in version.split("."):
match = _prefix_regex.search(item)
if match:
result.extend(match.groups())
else:
result.append(item)
return result
def _is_not_suffix(segment: str) -> bool:
return not any(
segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post")
)
def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]:
left_split, right_split = [], []
# Get the release segment of our versions
left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left)))
right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right)))
# Get the rest of our versions
left_split.append(left[len(left_split[0]) :])
right_split.append(right[len(right_split[0]) :])
# Insert our padding
left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0])))
right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0])))
return (list(itertools.chain(*left_split)), list(itertools.chain(*right_split)))
class SpecifierSet(BaseSpecifier):
def __init__(
self, specifiers: str = "", prereleases: Optional[bool] = None
) -> None:
# Split on , to break each individual specifier into it's own item, and
# strip each item to remove leading/trailing whitespace.
split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()]
# Parsed each individual specifier, attempting first to make it a
# Specifier and falling back to a LegacySpecifier.
parsed: Set[_IndividualSpecifier] = set()
for specifier in split_specifiers:
try:
parsed.add(Specifier(specifier))
except InvalidSpecifier:
parsed.add(LegacySpecifier(specifier))
# Turn our parsed specifiers into a frozen set and save them for later.
self._specs = frozenset(parsed)
# Store our prereleases value so we can use it later to determine if
# we accept prereleases or not.
self._prereleases = prereleases
def __repr__(self) -> str:
pre = (
f", prereleases={self.prereleases!r}"
if self._prereleases is not None
else ""
)
return f"<SpecifierSet({str(self)!r}{pre})>"
def __str__(self) -> str:
return ",".join(sorted(str(s) for s in self._specs))
def __hash__(self) -> int:
return hash(self._specs)
def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet":
if isinstance(other, str):
other = SpecifierSet(other)
elif not isinstance(other, SpecifierSet):
return NotImplemented
specifier = SpecifierSet()
specifier._specs = frozenset(self._specs | other._specs)
if self._prereleases is None and other._prereleases is not None:
specifier._prereleases = other._prereleases
elif self._prereleases is not None and other._prereleases is None:
specifier._prereleases = self._prereleases
elif self._prereleases == other._prereleases:
specifier._prereleases = self._prereleases
else:
raise ValueError(
"Cannot combine SpecifierSets with True and False prerelease "
"overrides."
)
return specifier
def __eq__(self, other: object) -> bool:
if isinstance(other, (str, _IndividualSpecifier)):
other = SpecifierSet(str(other))
elif not isinstance(other, SpecifierSet):
return NotImplemented
return self._specs == other._specs
def __len__(self) -> int:
return len(self._specs)
def __iter__(self) -> Iterator[_IndividualSpecifier]:
return iter(self._specs)
@property
def prereleases(self) -> Optional[bool]:
# If we have been given an explicit prerelease modifier, then we'll
# pass that through here.
if self._prereleases is not None:
return self._prereleases
# If we don't have any specifiers, and we don't have a forced value,
# then we'll just return None since we don't know if this should have
# pre-releases or not.
if not self._specs:
return None
# Otherwise we'll see if any of the given specifiers accept
# prereleases, if any of them do we'll return True, otherwise False.
return any(s.prereleases for s in self._specs)
@prereleases.setter
def prereleases(self, value: bool) -> None:
self._prereleases = value
def __contains__(self, item: UnparsedVersion) -> bool:
return self.contains(item)
def contains(
self, item: UnparsedVersion, prereleases: Optional[bool] = None
) -> bool:
# Ensure that our item is a Version or LegacyVersion instance.
if not isinstance(item, (LegacyVersion, Version)):
item = parse(item)
# Determine if we're forcing a prerelease or not, if we're not forcing
# one for this particular filter call, then we'll use whatever the
# SpecifierSet thinks for whether or not we should support prereleases.
if prereleases is None:
prereleases = self.prereleases
# We can determine if we're going to allow pre-releases by looking to
# see if any of the underlying items supports them. If none of them do
# and this item is a pre-release then we do not allow it and we can
# short circuit that here.
# Note: This means that 1.0.dev1 would not be contained in something
# like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0
if not prereleases and item.is_prerelease:
return False
# We simply dispatch to the underlying specs here to make sure that the
# given version is contained within all of them.
# Note: This use of all() here means that an empty set of specifiers
# will always return True, this is an explicit design decision.
return all(s.contains(item, prereleases=prereleases) for s in self._specs)
def filter(
self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None
) -> Iterable[VersionTypeVar]:
# Determine if we're forcing a prerelease or not, if we're not forcing
# one for this particular filter call, then we'll use whatever the
# SpecifierSet thinks for whether or not we should support prereleases.
if prereleases is None:
prereleases = self.prereleases
# If we have any specifiers, then we want to wrap our iterable in the
# filter method for each one, this will act as a logical AND amongst
# each specifier.
if self._specs:
for spec in self._specs:
iterable = spec.filter(iterable, prereleases=bool(prereleases))
return iterable
# If we do not have any specifiers, then we need to have a rough filter
# which will filter out any pre-releases, unless there are no final
# releases, and which will filter out LegacyVersion in general.
else:
filtered: List[VersionTypeVar] = []
found_prereleases: List[VersionTypeVar] = []
item: UnparsedVersion
parsed_version: Union[Version, LegacyVersion]
for item in iterable:
# Ensure that we some kind of Version class for this item.
if not isinstance(item, (LegacyVersion, Version)):
parsed_version = parse(item)
else:
parsed_version = item
# Filter out any item which is parsed as a LegacyVersion
if isinstance(parsed_version, LegacyVersion):
continue
# Store any item which is a pre-release for later unless we've
# already found a final version or we are accepting prereleases
if parsed_version.is_prerelease and not prereleases:
if not filtered:
found_prereleases.append(item)
else:
filtered.append(item)
# If we've found no items except for pre-releases, then we'll go
# ahead and use the pre-releases
if not filtered and found_prereleases and prereleases is None:
return found_prereleases
return filtered

View file

@ -0,0 +1,487 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import logging
import platform
import sys
import sysconfig
from importlib.machinery import EXTENSION_SUFFIXES
from typing import (
Dict,
FrozenSet,
Iterable,
Iterator,
List,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from . import _manylinux, _musllinux
logger = logging.getLogger(__name__)
PythonVersion = Sequence[int]
MacVersion = Tuple[int, int]
INTERPRETER_SHORT_NAMES: Dict[str, str] = {
"python": "py", # Generic.
"cpython": "cp",
"pypy": "pp",
"ironpython": "ip",
"jython": "jy",
}
_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32
class Tag:
"""
A representation of the tag triple for a wheel.
Instances are considered immutable and thus are hashable. Equality checking
is also supported.
"""
__slots__ = ["_interpreter", "_abi", "_platform", "_hash"]
def __init__(self, interpreter: str, abi: str, platform: str) -> None:
self._interpreter = interpreter.lower()
self._abi = abi.lower()
self._platform = platform.lower()
# The __hash__ of every single element in a Set[Tag] will be evaluated each time
# that a set calls its `.disjoint()` method, which may be called hundreds of
# times when scanning a page of links for packages with tags matching that
# Set[Tag]. Pre-computing the value here produces significant speedups for
# downstream consumers.
self._hash = hash((self._interpreter, self._abi, self._platform))
@property
def interpreter(self) -> str:
return self._interpreter
@property
def abi(self) -> str:
return self._abi
@property
def platform(self) -> str:
return self._platform
def __eq__(self, other: object) -> bool:
if not isinstance(other, Tag):
return NotImplemented
return (
(self._hash == other._hash) # Short-circuit ASAP for perf reasons.
and (self._platform == other._platform)
and (self._abi == other._abi)
and (self._interpreter == other._interpreter)
)
def __hash__(self) -> int:
return self._hash
def __str__(self) -> str:
return f"{self._interpreter}-{self._abi}-{self._platform}"
def __repr__(self) -> str:
return f"<{self} @ {id(self)}>"
def parse_tag(tag: str) -> FrozenSet[Tag]:
"""
Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances.
Returning a set is required due to the possibility that the tag is a
compressed tag set.
"""
tags = set()
interpreters, abis, platforms = tag.split("-")
for interpreter in interpreters.split("."):
for abi in abis.split("."):
for platform_ in platforms.split("."):
tags.add(Tag(interpreter, abi, platform_))
return frozenset(tags)
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
value = sysconfig.get_config_var(name)
if value is None and warn:
logger.debug(
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
)
return value
def _normalize_string(string: str) -> str:
return string.replace(".", "_").replace("-", "_")
def _abi3_applies(python_version: PythonVersion) -> bool:
"""
Determine if the Python version supports abi3.
PEP 384 was first implemented in Python 3.2.
"""
return len(python_version) > 1 and tuple(python_version) >= (3, 2)
def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]:
py_version = tuple(py_version) # To allow for version comparison.
abis = []
version = _version_nodot(py_version[:2])
debug = pymalloc = ucs4 = ""
with_debug = _get_config_var("Py_DEBUG", warn)
has_refcount = hasattr(sys, "gettotalrefcount")
# Windows doesn't set Py_DEBUG, so checking for support of debug-compiled
# extension modules is the best option.
# https://github.com/pypa/pip/issues/3383#issuecomment-173267692
has_ext = "_d.pyd" in EXTENSION_SUFFIXES
if with_debug or (with_debug is None and (has_refcount or has_ext)):
debug = "d"
if py_version < (3, 8):
with_pymalloc = _get_config_var("WITH_PYMALLOC", warn)
if with_pymalloc or with_pymalloc is None:
pymalloc = "m"
if py_version < (3, 3):
unicode_size = _get_config_var("Py_UNICODE_SIZE", warn)
if unicode_size == 4 or (
unicode_size is None and sys.maxunicode == 0x10FFFF
):
ucs4 = "u"
elif debug:
# Debug builds can also load "normal" extension modules.
# We can also assume no UCS-4 or pymalloc requirement.
abis.append(f"cp{version}")
abis.insert(
0,
"cp{version}{debug}{pymalloc}{ucs4}".format(
version=version, debug=debug, pymalloc=pymalloc, ucs4=ucs4
),
)
return abis
def cpython_tags(
python_version: Optional[PythonVersion] = None,
abis: Optional[Iterable[str]] = None,
platforms: Optional[Iterable[str]] = None,
*,
warn: bool = False,
) -> Iterator[Tag]:
"""
Yields the tags for a CPython interpreter.
The tags consist of:
- cp<python_version>-<abi>-<platform>
- cp<python_version>-abi3-<platform>
- cp<python_version>-none-<platform>
- cp<less than python_version>-abi3-<platform> # Older Python versions down to 3.2.
If python_version only specifies a major version then user-provided ABIs and
the 'none' ABItag will be used.
If 'abi3' or 'none' are specified in 'abis' then they will be yielded at
their normal position and not at the beginning.
"""
if not python_version:
python_version = sys.version_info[:2]
interpreter = f"cp{_version_nodot(python_version[:2])}"
if abis is None:
if len(python_version) > 1:
abis = _cpython_abis(python_version, warn)
else:
abis = []
abis = list(abis)
# 'abi3' and 'none' are explicitly handled later.
for explicit_abi in ("abi3", "none"):
try:
abis.remove(explicit_abi)
except ValueError:
pass
platforms = list(platforms or platform_tags())
for abi in abis:
for platform_ in platforms:
yield Tag(interpreter, abi, platform_)
if _abi3_applies(python_version):
yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms)
yield from (Tag(interpreter, "none", platform_) for platform_ in platforms)
if _abi3_applies(python_version):
for minor_version in range(python_version[1] - 1, 1, -1):
for platform_ in platforms:
interpreter = "cp{version}".format(
version=_version_nodot((python_version[0], minor_version))
)
yield Tag(interpreter, "abi3", platform_)
def _generic_abi() -> Iterator[str]:
abi = sysconfig.get_config_var("SOABI")
if abi:
yield _normalize_string(abi)
def generic_tags(
interpreter: Optional[str] = None,
abis: Optional[Iterable[str]] = None,
platforms: Optional[Iterable[str]] = None,
*,
warn: bool = False,
) -> Iterator[Tag]:
"""
Yields the tags for a generic interpreter.
The tags consist of:
- <interpreter>-<abi>-<platform>
The "none" ABI will be added if it was not explicitly provided.
"""
if not interpreter:
interp_name = interpreter_name()
interp_version = interpreter_version(warn=warn)
interpreter = "".join([interp_name, interp_version])
if abis is None:
abis = _generic_abi()
platforms = list(platforms or platform_tags())
abis = list(abis)
if "none" not in abis:
abis.append("none")
for abi in abis:
for platform_ in platforms:
yield Tag(interpreter, abi, platform_)
def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]:
"""
Yields Python versions in descending order.
After the latest version, the major-only version will be yielded, and then
all previous versions of that major version.
"""
if len(py_version) > 1:
yield f"py{_version_nodot(py_version[:2])}"
yield f"py{py_version[0]}"
if len(py_version) > 1:
for minor in range(py_version[1] - 1, -1, -1):
yield f"py{_version_nodot((py_version[0], minor))}"
def compatible_tags(
python_version: Optional[PythonVersion] = None,
interpreter: Optional[str] = None,
platforms: Optional[Iterable[str]] = None,
) -> Iterator[Tag]:
"""
Yields the sequence of tags that are compatible with a specific version of Python.
The tags consist of:
- py*-none-<platform>
- <interpreter>-none-any # ... if `interpreter` is provided.
- py*-none-any
"""
if not python_version:
python_version = sys.version_info[:2]
platforms = list(platforms or platform_tags())
for version in _py_interpreter_range(python_version):
for platform_ in platforms:
yield Tag(version, "none", platform_)
if interpreter:
yield Tag(interpreter, "none", "any")
for version in _py_interpreter_range(python_version):
yield Tag(version, "none", "any")
def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str:
if not is_32bit:
return arch
if arch.startswith("ppc"):
return "ppc"
return "i386"
def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]:
formats = [cpu_arch]
if cpu_arch == "x86_64":
if version < (10, 4):
return []
formats.extend(["intel", "fat64", "fat32"])
elif cpu_arch == "i386":
if version < (10, 4):
return []
formats.extend(["intel", "fat32", "fat"])
elif cpu_arch == "ppc64":
# TODO: Need to care about 32-bit PPC for ppc64 through 10.2?
if version > (10, 5) or version < (10, 4):
return []
formats.append("fat64")
elif cpu_arch == "ppc":
if version > (10, 6):
return []
formats.extend(["fat32", "fat"])
if cpu_arch in {"arm64", "x86_64"}:
formats.append("universal2")
if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}:
formats.append("universal")
return formats
def mac_platforms(
version: Optional[MacVersion] = None, arch: Optional[str] = None
) -> Iterator[str]:
"""
Yields the platform tags for a macOS system.
The `version` parameter is a two-item tuple specifying the macOS version to
generate platform tags for. The `arch` parameter is the CPU architecture to
generate platform tags for. Both parameters default to the appropriate value
for the current system.
"""
version_str, _, cpu_arch = platform.mac_ver()
if version is None:
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
else:
version = version
if arch is None:
arch = _mac_arch(cpu_arch)
else:
arch = arch
if (10, 0) <= version and version < (11, 0):
# Prior to Mac OS 11, each yearly release of Mac OS bumped the
# "minor" version number. The major version was always 10.
for minor_version in range(version[1], -1, -1):
compat_version = 10, minor_version
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=10, minor=minor_version, binary_format=binary_format
)
if version >= (11, 0):
# Starting with Mac OS 11, each yearly release bumps the major version
# number. The minor versions are now the midyear updates.
for major_version in range(version[0], 10, -1):
compat_version = major_version, 0
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=major_version, minor=0, binary_format=binary_format
)
if version >= (11, 0):
# Mac OS 11 on x86_64 is compatible with binaries from previous releases.
# Arm64 support was introduced in 11.0, so no Arm binaries from previous
# releases exist.
#
# However, the "universal2" binary format can have a
# macOS version earlier than 11.0 when the x86_64 part of the binary supports
# that version of macOS.
if arch == "x86_64":
for minor_version in range(16, 3, -1):
compat_version = 10, minor_version
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=compat_version[0],
minor=compat_version[1],
binary_format=binary_format,
)
else:
for minor_version in range(16, 3, -1):
compat_version = 10, minor_version
binary_format = "universal2"
yield "macosx_{major}_{minor}_{binary_format}".format(
major=compat_version[0],
minor=compat_version[1],
binary_format=binary_format,
)
def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]:
linux = _normalize_string(sysconfig.get_platform())
if is_32bit:
if linux == "linux_x86_64":
linux = "linux_i686"
elif linux == "linux_aarch64":
linux = "linux_armv7l"
_, arch = linux.split("_", 1)
yield from _manylinux.platform_tags(linux, arch)
yield from _musllinux.platform_tags(arch)
yield linux
def _generic_platforms() -> Iterator[str]:
yield _normalize_string(sysconfig.get_platform())
def platform_tags() -> Iterator[str]:
"""
Provides the platform tags for this installation.
"""
if platform.system() == "Darwin":
return mac_platforms()
elif platform.system() == "Linux":
return _linux_platforms()
else:
return _generic_platforms()
def interpreter_name() -> str:
"""
Returns the name of the running interpreter.
"""
name = sys.implementation.name
return INTERPRETER_SHORT_NAMES.get(name) or name
def interpreter_version(*, warn: bool = False) -> str:
"""
Returns the version of the running interpreter.
"""
version = _get_config_var("py_version_nodot", warn=warn)
if version:
version = str(version)
else:
version = _version_nodot(sys.version_info[:2])
return version
def _version_nodot(version: PythonVersion) -> str:
return "".join(map(str, version))
def sys_tags(*, warn: bool = False) -> Iterator[Tag]:
"""
Returns the sequence of tag triples for the running interpreter.
The order of the sequence corresponds to priority order for the
interpreter, from most to least important.
"""
interp_name = interpreter_name()
if interp_name == "cp":
yield from cpython_tags(warn=warn)
else:
yield from generic_tags()
if interp_name == "pp":
yield from compatible_tags(interpreter="pp3")
else:
yield from compatible_tags()

View file

@ -0,0 +1,136 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import re
from typing import FrozenSet, NewType, Tuple, Union, cast
from .tags import Tag, parse_tag
from .version import InvalidVersion, Version
BuildTag = Union[Tuple[()], Tuple[int, str]]
NormalizedName = NewType("NormalizedName", str)
class InvalidWheelFilename(ValueError):
"""
An invalid wheel filename was found, users should refer to PEP 427.
"""
class InvalidSdistFilename(ValueError):
"""
An invalid sdist filename was found, users should refer to the packaging user guide.
"""
_canonicalize_regex = re.compile(r"[-_.]+")
# PEP 427: The build number must start with a digit.
_build_tag_regex = re.compile(r"(\d+)(.*)")
def canonicalize_name(name: str) -> NormalizedName:
# This is taken from PEP 503.
value = _canonicalize_regex.sub("-", name).lower()
return cast(NormalizedName, value)
def canonicalize_version(version: Union[Version, str]) -> str:
"""
This is very similar to Version.__str__, but has one subtle difference
with the way it handles the release segment.
"""
if isinstance(version, str):
try:
parsed = Version(version)
except InvalidVersion:
# Legacy versions cannot be normalized
return version
else:
parsed = version
parts = []
# Epoch
if parsed.epoch != 0:
parts.append(f"{parsed.epoch}!")
# Release segment
# NB: This strips trailing '.0's to normalize
parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release)))
# Pre-release
if parsed.pre is not None:
parts.append("".join(str(x) for x in parsed.pre))
# Post-release
if parsed.post is not None:
parts.append(f".post{parsed.post}")
# Development release
if parsed.dev is not None:
parts.append(f".dev{parsed.dev}")
# Local version segment
if parsed.local is not None:
parts.append(f"+{parsed.local}")
return "".join(parts)
def parse_wheel_filename(
filename: str,
) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]:
if not filename.endswith(".whl"):
raise InvalidWheelFilename(
f"Invalid wheel filename (extension must be '.whl'): {filename}"
)
filename = filename[:-4]
dashes = filename.count("-")
if dashes not in (4, 5):
raise InvalidWheelFilename(
f"Invalid wheel filename (wrong number of parts): {filename}"
)
parts = filename.split("-", dashes - 2)
name_part = parts[0]
# See PEP 427 for the rules on escaping the project name
if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None:
raise InvalidWheelFilename(f"Invalid project name: {filename}")
name = canonicalize_name(name_part)
version = Version(parts[1])
if dashes == 5:
build_part = parts[2]
build_match = _build_tag_regex.match(build_part)
if build_match is None:
raise InvalidWheelFilename(
f"Invalid build number: {build_part} in '{filename}'"
)
build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2)))
else:
build = ()
tags = parse_tag(parts[-1])
return (name, version, build, tags)
def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]:
if filename.endswith(".tar.gz"):
file_stem = filename[: -len(".tar.gz")]
elif filename.endswith(".zip"):
file_stem = filename[: -len(".zip")]
else:
raise InvalidSdistFilename(
f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):"
f" {filename}"
)
# We are requiring a PEP 440 version, which cannot contain dashes,
# so we split on the last dash.
name_part, sep, version_part = file_stem.rpartition("-")
if not sep:
raise InvalidSdistFilename(f"Invalid sdist filename: {filename}")
name = canonicalize_name(name_part)
version = Version(version_part)
return (name, version)

View file

@ -0,0 +1,504 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import collections
import itertools
import re
import warnings
from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"]
InfiniteTypes = Union[InfinityType, NegativeInfinityType]
PrePostDevType = Union[InfiniteTypes, Tuple[str, int]]
SubLocalType = Union[InfiniteTypes, int, str]
LocalType = Union[
NegativeInfinityType,
Tuple[
Union[
SubLocalType,
Tuple[SubLocalType, str],
Tuple[NegativeInfinityType, SubLocalType],
],
...,
],
]
CmpKey = Tuple[
int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType
]
LegacyCmpKey = Tuple[int, Tuple[str, ...]]
VersionComparisonMethod = Callable[
[Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool
]
_Version = collections.namedtuple(
"_Version", ["epoch", "release", "dev", "pre", "post", "local"]
)
def parse(version: str) -> Union["LegacyVersion", "Version"]:
"""
Parse the given version string and return either a :class:`Version` object
or a :class:`LegacyVersion` object depending on if the given version is
a valid PEP 440 version or a legacy version.
"""
try:
return Version(version)
except InvalidVersion:
return LegacyVersion(version)
class InvalidVersion(ValueError):
"""
An invalid version was found, users should refer to PEP 440.
"""
class _BaseVersion:
_key: Union[CmpKey, LegacyCmpKey]
def __hash__(self) -> int:
return hash(self._key)
# Please keep the duplicated `isinstance` check
# in the six comparisons hereunder
# unless you find a way to avoid adding overhead function calls.
def __lt__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key < other._key
def __le__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key <= other._key
def __eq__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key == other._key
def __ge__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key >= other._key
def __gt__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key > other._key
def __ne__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key != other._key
class LegacyVersion(_BaseVersion):
def __init__(self, version: str) -> None:
self._version = str(version)
self._key = _legacy_cmpkey(self._version)
warnings.warn(
"Creating a LegacyVersion has been deprecated and will be "
"removed in the next major release",
DeprecationWarning,
)
def __str__(self) -> str:
return self._version
def __repr__(self) -> str:
return f"<LegacyVersion('{self}')>"
@property
def public(self) -> str:
return self._version
@property
def base_version(self) -> str:
return self._version
@property
def epoch(self) -> int:
return -1
@property
def release(self) -> None:
return None
@property
def pre(self) -> None:
return None
@property
def post(self) -> None:
return None
@property
def dev(self) -> None:
return None
@property
def local(self) -> None:
return None
@property
def is_prerelease(self) -> bool:
return False
@property
def is_postrelease(self) -> bool:
return False
@property
def is_devrelease(self) -> bool:
return False
_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE)
_legacy_version_replacement_map = {
"pre": "c",
"preview": "c",
"-": "final-",
"rc": "c",
"dev": "@",
}
def _parse_version_parts(s: str) -> Iterator[str]:
for part in _legacy_version_component_re.split(s):
part = _legacy_version_replacement_map.get(part, part)
if not part or part == ".":
continue
if part[:1] in "0123456789":
# pad for numeric comparison
yield part.zfill(8)
else:
yield "*" + part
# ensure that alpha/beta/candidate are before final
yield "*final"
def _legacy_cmpkey(version: str) -> LegacyCmpKey:
# We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch
# greater than or equal to 0. This will effectively put the LegacyVersion,
# which uses the defacto standard originally implemented by setuptools,
# as before all PEP 440 versions.
epoch = -1
# This scheme is taken from pkg_resources.parse_version setuptools prior to
# it's adoption of the packaging library.
parts: List[str] = []
for part in _parse_version_parts(version.lower()):
if part.startswith("*"):
# remove "-" before a prerelease tag
if part < "*final":
while parts and parts[-1] == "*final-":
parts.pop()
# remove trailing zeros from each series of numeric parts
while parts and parts[-1] == "00000000":
parts.pop()
parts.append(part)
return epoch, tuple(parts)
# Deliberately not anchored to the start and end of the string, to make it
# easier for 3rd party code to reuse
VERSION_PATTERN = r"""
v?
(?:
(?:(?P<epoch>[0-9]+)!)? # epoch
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
(?P<pre> # pre-release
[-_\.]?
(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
[-_\.]?
(?P<pre_n>[0-9]+)?
)?
(?P<post> # post release
(?:-(?P<post_n1>[0-9]+))
|
(?:
[-_\.]?
(?P<post_l>post|rev|r)
[-_\.]?
(?P<post_n2>[0-9]+)?
)
)?
(?P<dev> # dev release
[-_\.]?
(?P<dev_l>dev)
[-_\.]?
(?P<dev_n>[0-9]+)?
)?
)
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
"""
class Version(_BaseVersion):
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
def __init__(self, version: str) -> None:
# Validate the version and parse it into pieces
match = self._regex.search(version)
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")
# Store the parsed out pieces of the version
self._version = _Version(
epoch=int(match.group("epoch")) if match.group("epoch") else 0,
release=tuple(int(i) for i in match.group("release").split(".")),
pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
post=_parse_letter_version(
match.group("post_l"), match.group("post_n1") or match.group("post_n2")
),
dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
local=_parse_local_version(match.group("local")),
)
# Generate a key which will be used for sorting
self._key = _cmpkey(
self._version.epoch,
self._version.release,
self._version.pre,
self._version.post,
self._version.dev,
self._version.local,
)
def __repr__(self) -> str:
return f"<Version('{self}')>"
def __str__(self) -> str:
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
# Pre-release
if self.pre is not None:
parts.append("".join(str(x) for x in self.pre))
# Post-release
if self.post is not None:
parts.append(f".post{self.post}")
# Development release
if self.dev is not None:
parts.append(f".dev{self.dev}")
# Local version segment
if self.local is not None:
parts.append(f"+{self.local}")
return "".join(parts)
@property
def epoch(self) -> int:
_epoch: int = self._version.epoch
return _epoch
@property
def release(self) -> Tuple[int, ...]:
_release: Tuple[int, ...] = self._version.release
return _release
@property
def pre(self) -> Optional[Tuple[str, int]]:
_pre: Optional[Tuple[str, int]] = self._version.pre
return _pre
@property
def post(self) -> Optional[int]:
return self._version.post[1] if self._version.post else None
@property
def dev(self) -> Optional[int]:
return self._version.dev[1] if self._version.dev else None
@property
def local(self) -> Optional[str]:
if self._version.local:
return ".".join(str(x) for x in self._version.local)
else:
return None
@property
def public(self) -> str:
return str(self).split("+", 1)[0]
@property
def base_version(self) -> str:
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
return "".join(parts)
@property
def is_prerelease(self) -> bool:
return self.dev is not None or self.pre is not None
@property
def is_postrelease(self) -> bool:
return self.post is not None
@property
def is_devrelease(self) -> bool:
return self.dev is not None
@property
def major(self) -> int:
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
return self.release[1] if len(self.release) >= 2 else 0
@property
def micro(self) -> int:
return self.release[2] if len(self.release) >= 3 else 0
def _parse_letter_version(
letter: str, number: Union[str, bytes, SupportsInt]
) -> Optional[Tuple[str, int]]:
if letter:
# We consider there to be an implicit 0 in a pre-release if there is
# not a numeral associated with it.
if number is None:
number = 0
# We normalize any letters to their lower case form
letter = letter.lower()
# We consider some words to be alternate spellings of other words and
# in those cases we want to normalize the spellings to our preferred
# spelling.
if letter == "alpha":
letter = "a"
elif letter == "beta":
letter = "b"
elif letter in ["c", "pre", "preview"]:
letter = "rc"
elif letter in ["rev", "r"]:
letter = "post"
return letter, int(number)
if not letter and number:
# We assume if we are given a number, but we are not given a letter
# then this is using the implicit post release syntax (e.g. 1.0-1)
letter = "post"
return letter, int(number)
return None
_local_version_separators = re.compile(r"[\._-]")
def _parse_local_version(local: str) -> Optional[LocalType]:
"""
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
"""
if local is not None:
return tuple(
part.lower() if not part.isdigit() else int(part)
for part in _local_version_separators.split(local)
)
return None
def _cmpkey(
epoch: int,
release: Tuple[int, ...],
pre: Optional[Tuple[str, int]],
post: Optional[Tuple[str, int]],
dev: Optional[Tuple[str, int]],
local: Optional[Tuple[SubLocalType]],
) -> CmpKey:
# When we compare a release version, we want to compare it with all of the
# trailing zeros removed. So we'll use a reverse the list, drop all the now
# leading zeros until we come to something non zero, then take the rest
# re-reverse it back into the correct order and make it a tuple and use
# that for our sorting key.
_release = tuple(
reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
)
# We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
# We'll do this by abusing the pre segment, but we _only_ want to do this
# if there is not a pre or a post segment. If we have one of those then
# the normal sorting rules will handle this case correctly.
if pre is None and post is None and dev is not None:
_pre: PrePostDevType = NegativeInfinity
# Versions without a pre-release (except as noted above) should sort after
# those with one.
elif pre is None:
_pre = Infinity
else:
_pre = pre
# Versions without a post segment should sort before those with one.
if post is None:
_post: PrePostDevType = NegativeInfinity
else:
_post = post
# Versions without a development segment should sort after those with one.
if dev is None:
_dev: PrePostDevType = Infinity
else:
_dev = dev
if local is None:
# Versions without a local segment should sort before those with one.
_local: LocalType = NegativeInfinity
else:
# Versions with a local segment need that segment parsed to implement
# the sorting rules in PEP440.
# - Alpha numeric segments sort before numeric segments
# - Alpha numeric segments sort lexicographically
# - Numeric segments sort numerically
# - Shorter versions sort before longer versions when the prefixes
# match exactly
_local = tuple(
(i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
)
return epoch, _release, _pre, _post, _dev, _local

View file

@ -0,0 +1,331 @@
# module pyparsing.py
#
# Copyright (c) 2003-2022 Paul T. McGuire
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
__doc__ = """
pyparsing module - Classes and methods to define and execute parsing grammars
=============================================================================
The pyparsing module is an alternative approach to creating and
executing simple grammars, vs. the traditional lex/yacc approach, or the
use of regular expressions. With pyparsing, you don't need to learn
a new syntax for defining grammars or matching expressions - the parsing
module provides a library of classes that you use to construct the
grammar directly in Python.
Here is a program to parse "Hello, World!" (or any greeting of the form
``"<salutation>, <addressee>!"``), built up using :class:`Word`,
:class:`Literal`, and :class:`And` elements
(the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
and the strings are auto-converted to :class:`Literal` expressions)::
from pyparsing import Word, alphas
# define grammar of a greeting
greet = Word(alphas) + "," + Word(alphas) + "!"
hello = "Hello, World!"
print(hello, "->", greet.parse_string(hello))
The program outputs the following::
Hello, World! -> ['Hello', ',', 'World', '!']
The Python representation of the grammar is quite readable, owing to the
self-explanatory class names, and the use of :class:`'+'<And>`,
:class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators.
The :class:`ParseResults` object returned from
:class:`ParserElement.parseString` can be
accessed as a nested list, a dictionary, or an object with named
attributes.
The pyparsing module handles some of the problems that are typically
vexing when writing text parsers:
- extra or missing whitespace (the above program will also handle
"Hello,World!", "Hello , World !", etc.)
- quoted strings
- embedded comments
Getting Started -
-----------------
Visit the classes :class:`ParserElement` and :class:`ParseResults` to
see the base classes that most other pyparsing
classes inherit from. Use the docstrings for examples of how to:
- construct literal match expressions from :class:`Literal` and
:class:`CaselessLiteral` classes
- construct character word-group expressions using the :class:`Word`
class
- see how to create repetitive expressions using :class:`ZeroOrMore`
and :class:`OneOrMore` classes
- use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
and :class:`'&'<Each>` operators to combine simple expressions into
more complex ones
- associate names with your parsed results using
:class:`ParserElement.setResultsName`
- access the parsed data, which is returned as a :class:`ParseResults`
object
- find some helpful expression short-cuts like :class:`delimitedList`
and :class:`oneOf`
- find more useful common expressions in the :class:`pyparsing_common`
namespace class
"""
from typing import NamedTuple
class version_info(NamedTuple):
major: int
minor: int
micro: int
releaselevel: str
serial: int
@property
def __version__(self):
return (
"{}.{}.{}".format(self.major, self.minor, self.micro)
+ (
"{}{}{}".format(
"r" if self.releaselevel[0] == "c" else "",
self.releaselevel[0],
self.serial,
),
"",
)[self.releaselevel == "final"]
)
def __str__(self):
return "{} {} / {}".format(__name__, self.__version__, __version_time__)
def __repr__(self):
return "{}.{}({})".format(
__name__,
type(self).__name__,
", ".join("{}={!r}".format(*nv) for nv in zip(self._fields, self)),
)
__version_info__ = version_info(3, 0, 9, "final", 0)
__version_time__ = "05 May 2022 07:02 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
from .util import *
from .exceptions import *
from .actions import *
from .core import __diag__, __compat__
from .results import *
from .core import *
from .core import _builtin_exprs as core_builtin_exprs
from .helpers import *
from .helpers import _builtin_exprs as helper_builtin_exprs
from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
from .testing import pyparsing_test as testing
from .common import (
pyparsing_common as common,
_builtin_exprs as common_builtin_exprs,
)
# define backward compat synonyms
if "pyparsing_unicode" not in globals():
pyparsing_unicode = unicode
if "pyparsing_common" not in globals():
pyparsing_common = common
if "pyparsing_test" not in globals():
pyparsing_test = testing
core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
__all__ = [
"__version__",
"__version_time__",
"__author__",
"__compat__",
"__diag__",
"And",
"AtLineStart",
"AtStringStart",
"CaselessKeyword",
"CaselessLiteral",
"CharsNotIn",
"Combine",
"Dict",
"Each",
"Empty",
"FollowedBy",
"Forward",
"GoToColumn",
"Group",
"IndentedBlock",
"Keyword",
"LineEnd",
"LineStart",
"Literal",
"Located",
"PrecededBy",
"MatchFirst",
"NoMatch",
"NotAny",
"OneOrMore",
"OnlyOnce",
"OpAssoc",
"Opt",
"Optional",
"Or",
"ParseBaseException",
"ParseElementEnhance",
"ParseException",
"ParseExpression",
"ParseFatalException",
"ParseResults",
"ParseSyntaxException",
"ParserElement",
"PositionToken",
"QuotedString",
"RecursiveGrammarException",
"Regex",
"SkipTo",
"StringEnd",
"StringStart",
"Suppress",
"Token",
"TokenConverter",
"White",
"Word",
"WordEnd",
"WordStart",
"ZeroOrMore",
"Char",
"alphanums",
"alphas",
"alphas8bit",
"any_close_tag",
"any_open_tag",
"c_style_comment",
"col",
"common_html_entity",
"counted_array",
"cpp_style_comment",
"dbl_quoted_string",
"dbl_slash_comment",
"delimited_list",
"dict_of",
"empty",
"hexnums",
"html_comment",
"identchars",
"identbodychars",
"java_style_comment",
"line",
"line_end",
"line_start",
"lineno",
"make_html_tags",
"make_xml_tags",
"match_only_at_col",
"match_previous_expr",
"match_previous_literal",
"nested_expr",
"null_debug_action",
"nums",
"one_of",
"printables",
"punc8bit",
"python_style_comment",
"quoted_string",
"remove_quotes",
"replace_with",
"replace_html_entity",
"rest_of_line",
"sgl_quoted_string",
"srange",
"string_end",
"string_start",
"trace_parse_action",
"unicode_string",
"with_attribute",
"indentedBlock",
"original_text_for",
"ungroup",
"infix_notation",
"locatedExpr",
"with_class",
"CloseMatch",
"token_map",
"pyparsing_common",
"pyparsing_unicode",
"unicode_set",
"condition_as_parse_action",
"pyparsing_test",
# pre-PEP8 compatibility names
"__versionTime__",
"anyCloseTag",
"anyOpenTag",
"cStyleComment",
"commonHTMLEntity",
"countedArray",
"cppStyleComment",
"dblQuotedString",
"dblSlashComment",
"delimitedList",
"dictOf",
"htmlComment",
"javaStyleComment",
"lineEnd",
"lineStart",
"makeHTMLTags",
"makeXMLTags",
"matchOnlyAtCol",
"matchPreviousExpr",
"matchPreviousLiteral",
"nestedExpr",
"nullDebugAction",
"oneOf",
"opAssoc",
"pythonStyleComment",
"quotedString",
"removeQuotes",
"replaceHTMLEntity",
"replaceWith",
"restOfLine",
"sglQuotedString",
"stringEnd",
"stringStart",
"traceParseAction",
"unicodeString",
"withAttribute",
"indentedBlock",
"originalTextFor",
"infixNotation",
"locatedExpr",
"withClass",
"tokenMap",
"conditionAsParseAction",
"autoname_elements",
]

View file

@ -0,0 +1,207 @@
# actions.py
from .exceptions import ParseException
from .util import col
class OnlyOnce:
"""
Wrapper for parse actions, to ensure they are only called once.
"""
def __init__(self, method_call):
from .core import _trim_arity
self.callable = _trim_arity(method_call)
self.called = False
def __call__(self, s, l, t):
if not self.called:
results = self.callable(s, l, t)
self.called = True
return results
raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
def reset(self):
"""
Allow the associated parse action to be called once more.
"""
self.called = False
def match_only_at_col(n):
"""
Helper method for defining parse actions that require matching at
a specific column in the input text.
"""
def verify_col(strg, locn, toks):
if col(locn, strg) != n:
raise ParseException(strg, locn, "matched token not at column {}".format(n))
return verify_col
def replace_with(repl_str):
"""
Helper method for common parse actions that simply return
a literal value. Especially useful when used with
:class:`transform_string<ParserElement.transform_string>` ().
Example::
num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
term = na | num
term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234]
"""
return lambda s, l, t: [repl_str]
def remove_quotes(s, l, t):
"""
Helper parse action for removing quotation marks from parsed
quoted strings.
Example::
# by default, quotation marks are included in parsed results
quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
# use remove_quotes to strip quotation marks from parsed results
quoted_string.set_parse_action(remove_quotes)
quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
"""
return t[0][1:-1]
def with_attribute(*args, **attr_dict):
"""
Helper to create a validating parse action to be used with start
tags created with :class:`make_xml_tags` or
:class:`make_html_tags`. Use ``with_attribute`` to qualify
a starting tag with a required attribute value, to avoid false
matches on common tags such as ``<TD>`` or ``<DIV>``.
Call ``with_attribute`` with a series of attribute names and
values. Specify the list of filter attributes names and values as:
- keyword arguments, as in ``(align="right")``, or
- as an explicit dict with ``**`` operator, when an attribute
name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
- a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
For attribute names with a namespace prefix, you must use the second
form. Attribute names are matched insensitive to upper/lower case.
If just testing for ``class`` (with or without a namespace), use
:class:`with_class`.
To verify that the attribute exists, but without specifying a value,
pass ``with_attribute.ANY_VALUE`` as the value.
Example::
html = '''
<div>
Some text
<div type="grid">1 4 0 1 0</div>
<div type="graph">1,3 2,3 1,1</div>
<div>this has no type</div>
</div>
'''
div,div_end = make_html_tags("div")
# only match div tag having a type attribute with value "grid"
div_grid = div().set_parse_action(with_attribute(type="grid"))
grid_expr = div_grid + SkipTo(div | div_end)("body")
for grid_header in grid_expr.search_string(html):
print(grid_header.body)
# construct a match with any div tag having a type attribute, regardless of the value
div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE))
div_expr = div_any_type + SkipTo(div | div_end)("body")
for div_header in div_expr.search_string(html):
print(div_header.body)
prints::
1 4 0 1 0
1 4 0 1 0
1,3 2,3 1,1
"""
if args:
attrs = args[:]
else:
attrs = attr_dict.items()
attrs = [(k, v) for k, v in attrs]
def pa(s, l, tokens):
for attrName, attrValue in attrs:
if attrName not in tokens:
raise ParseException(s, l, "no matching attribute " + attrName)
if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue:
raise ParseException(
s,
l,
"attribute {!r} has value {!r}, must be {!r}".format(
attrName, tokens[attrName], attrValue
),
)
return pa
with_attribute.ANY_VALUE = object()
def with_class(classname, namespace=""):
"""
Simplified version of :class:`with_attribute` when
matching on a div class - made difficult because ``class`` is
a reserved word in Python.
Example::
html = '''
<div>
Some text
<div class="grid">1 4 0 1 0</div>
<div class="graph">1,3 2,3 1,1</div>
<div>this &lt;div&gt; has no class</div>
</div>
'''
div,div_end = make_html_tags("div")
div_grid = div().set_parse_action(with_class("grid"))
grid_expr = div_grid + SkipTo(div | div_end)("body")
for grid_header in grid_expr.search_string(html):
print(grid_header.body)
div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE))
div_expr = div_any_type + SkipTo(div | div_end)("body")
for div_header in div_expr.search_string(html):
print(div_header.body)
prints::
1 4 0 1 0
1 4 0 1 0
1,3 2,3 1,1
"""
classattr = "{}:class".format(namespace) if namespace else "class"
return with_attribute(**{classattr: classname})
# pre-PEP8 compatibility symbols
replaceWith = replace_with
removeQuotes = remove_quotes
withAttribute = with_attribute
withClass = with_class
matchOnlyAtCol = match_only_at_col

View file

@ -0,0 +1,424 @@
# common.py
from .core import *
from .helpers import delimited_list, any_open_tag, any_close_tag
from datetime import datetime
# some other useful expressions - using lower-case class name since we are really using this as a namespace
class pyparsing_common:
"""Here are some common low-level expressions that may be useful in
jump-starting parser development:
- numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
:class:`scientific notation<sci_real>`)
- common :class:`programming identifiers<identifier>`
- network addresses (:class:`MAC<mac_address>`,
:class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
- ISO8601 :class:`dates<iso8601_date>` and
:class:`datetime<iso8601_datetime>`
- :class:`UUID<uuid>`
- :class:`comma-separated list<comma_separated_list>`
- :class:`url`
Parse actions:
- :class:`convertToInteger`
- :class:`convertToFloat`
- :class:`convertToDate`
- :class:`convertToDatetime`
- :class:`stripHTMLTags`
- :class:`upcaseTokens`
- :class:`downcaseTokens`
Example::
pyparsing_common.number.runTests('''
# any int or real number, returned as the appropriate type
100
-100
+100
3.14159
6.02e23
1e-12
''')
pyparsing_common.fnumber.runTests('''
# any int or real number, returned as float
100
-100
+100
3.14159
6.02e23
1e-12
''')
pyparsing_common.hex_integer.runTests('''
# hex numbers
100
FF
''')
pyparsing_common.fraction.runTests('''
# fractions
1/2
-3/4
''')
pyparsing_common.mixed_integer.runTests('''
# mixed fractions
1
1/2
-3/4
1-3/4
''')
import uuid
pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
pyparsing_common.uuid.runTests('''
# uuid
12345678-1234-5678-1234-567812345678
''')
prints::
# any int or real number, returned as the appropriate type
100
[100]
-100
[-100]
+100
[100]
3.14159
[3.14159]
6.02e23
[6.02e+23]
1e-12
[1e-12]
# any int or real number, returned as float
100
[100.0]
-100
[-100.0]
+100
[100.0]
3.14159
[3.14159]
6.02e23
[6.02e+23]
1e-12
[1e-12]
# hex numbers
100
[256]
FF
[255]
# fractions
1/2
[0.5]
-3/4
[-0.75]
# mixed fractions
1
[1]
1/2
[0.5]
-3/4
[-0.75]
1-3/4
[1.75]
# uuid
12345678-1234-5678-1234-567812345678
[UUID('12345678-1234-5678-1234-567812345678')]
"""
convert_to_integer = token_map(int)
"""
Parse action for converting parsed integers to Python int
"""
convert_to_float = token_map(float)
"""
Parse action for converting parsed numbers to Python float
"""
integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer)
"""expression that parses an unsigned integer, returns an int"""
hex_integer = (
Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
)
"""expression that parses a hexadecimal integer, returns an int"""
signed_integer = (
Regex(r"[+-]?\d+")
.set_name("signed integer")
.set_parse_action(convert_to_integer)
)
"""expression that parses an integer with optional leading sign, returns an int"""
fraction = (
signed_integer().set_parse_action(convert_to_float)
+ "/"
+ signed_integer().set_parse_action(convert_to_float)
).set_name("fraction")
"""fractional expression of an integer divided by an integer, returns a float"""
fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
mixed_integer = (
fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
).set_name("fraction or mixed integer-fraction")
"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
mixed_integer.add_parse_action(sum)
real = (
Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
.set_name("real number")
.set_parse_action(convert_to_float)
)
"""expression that parses a floating point number and returns a float"""
sci_real = (
Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
.set_name("real number with scientific notation")
.set_parse_action(convert_to_float)
)
"""expression that parses a floating point number with optional
scientific notation and returns a float"""
# streamlining this expression makes the docs nicer-looking
number = (sci_real | real | signed_integer).setName("number").streamline()
"""any numeric expression, returns the corresponding Python type"""
fnumber = (
Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?")
.set_name("fnumber")
.set_parse_action(convert_to_float)
)
"""any int or real number, returned as float"""
identifier = Word(identchars, identbodychars).set_name("identifier")
"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
ipv4_address = Regex(
r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
).set_name("IPv4 address")
"IPv4 address (``0.0.0.0 - 255.255.255.255``)"
_ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
_full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
"full IPv6 address"
)
_short_ipv6_address = (
Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
+ "::"
+ Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
).set_name("short IPv6 address")
_short_ipv6_address.add_condition(
lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
)
_mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
ipv6_address = Combine(
(_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
"IPv6 address"
)
).set_name("IPv6 address")
"IPv6 address (long, short, or mixed form)"
mac_address = Regex(
r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
).set_name("MAC address")
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
@staticmethod
def convert_to_date(fmt: str = "%Y-%m-%d"):
"""
Helper to create a parse action for converting parsed date string to Python datetime.date
Params -
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
Example::
date_expr = pyparsing_common.iso8601_date.copy()
date_expr.setParseAction(pyparsing_common.convertToDate())
print(date_expr.parseString("1999-12-31"))
prints::
[datetime.date(1999, 12, 31)]
"""
def cvt_fn(ss, ll, tt):
try:
return datetime.strptime(tt[0], fmt).date()
except ValueError as ve:
raise ParseException(ss, ll, str(ve))
return cvt_fn
@staticmethod
def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
"""Helper to create a parse action for converting parsed
datetime string to Python datetime.datetime
Params -
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
Example::
dt_expr = pyparsing_common.iso8601_datetime.copy()
dt_expr.setParseAction(pyparsing_common.convertToDatetime())
print(dt_expr.parseString("1999-12-31T23:59:59.999"))
prints::
[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
"""
def cvt_fn(s, l, t):
try:
return datetime.strptime(t[0], fmt)
except ValueError as ve:
raise ParseException(s, l, str(ve))
return cvt_fn
iso8601_date = Regex(
r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
).set_name("ISO8601 date")
"ISO8601 date (``yyyy-mm-dd``)"
iso8601_datetime = Regex(
r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
).set_name("ISO8601 datetime")
"ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
"UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
_html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
@staticmethod
def strip_html_tags(s: str, l: int, tokens: ParseResults):
"""Parse action to remove HTML tags from web page HTML source
Example::
# strip HTML links from normal text
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
td, td_end = makeHTMLTags("TD")
table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
print(table_text.parseString(text).body)
Prints::
More info at the pyparsing wiki page
"""
return pyparsing_common._html_stripper.transform_string(tokens[0])
_commasepitem = (
Combine(
OneOrMore(
~Literal(",")
+ ~LineEnd()
+ Word(printables, exclude_chars=",")
+ Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
)
)
.streamline()
.set_name("commaItem")
)
comma_separated_list = delimited_list(
Opt(quoted_string.copy() | _commasepitem, default="")
).set_name("comma separated list")
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
"""Parse action to convert tokens to upper case."""
downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
"""Parse action to convert tokens to lower case."""
# fmt: off
url = Regex(
# https://mathiasbynens.be/demo/url-regex
# https://gist.github.com/dperini/729294
r"^" +
# protocol identifier (optional)
# short syntax // still required
r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
# user:pass BasicAuth (optional)
r"(?:(?P<auth>\S+(?::\S*)?)@)?" +
r"(?P<host>" +
# IP address exclusion
# private & local networks
r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
# IP address dotted notation octets
# excludes loopback network 0.0.0.0
# excludes reserved space >= 224.0.0.0
# excludes network & broadcast addresses
# (first & last IP address of each class)
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
r"|" +
# host & domain names, may end with dot
# can be replaced by a shortest alternative
# (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
r"(?:" +
r"(?:" +
r"[a-z0-9\u00a1-\uffff]" +
r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
r")?" +
r"[a-z0-9\u00a1-\uffff]\." +
r")+" +
# TLD identifier name, may end with dot
r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
r")" +
# port number (optional)
r"(:(?P<port>\d{2,5}))?" +
# resource path (optional)
r"(?P<path>\/[^?# ]*)?" +
# query string (optional)
r"(\?(?P<query>[^#]*))?" +
# fragment (optional)
r"(#(?P<fragment>\S*))?" +
r"$"
).set_name("url")
# fmt: on
# pre-PEP8 compatibility names
convertToInteger = convert_to_integer
convertToFloat = convert_to_float
convertToDate = convert_to_date
convertToDatetime = convert_to_datetime
stripHTMLTags = strip_html_tags
upcaseTokens = upcase_tokens
downcaseTokens = downcase_tokens
_builtin_exprs = [
v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
]

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,642 @@
import railroad
import pyparsing
import typing
from typing import (
List,
NamedTuple,
Generic,
TypeVar,
Dict,
Callable,
Set,
Iterable,
)
from jinja2 import Template
from io import StringIO
import inspect
jinja2_template_source = """\
<!DOCTYPE html>
<html>
<head>
{% if not head %}
<style type="text/css">
.railroad-heading {
font-family: monospace;
}
</style>
{% else %}
{{ head | safe }}
{% endif %}
</head>
<body>
{{ body | safe }}
{% for diagram in diagrams %}
<div class="railroad-group">
<h1 class="railroad-heading">{{ diagram.title }}</h1>
<div class="railroad-description">{{ diagram.text }}</div>
<div class="railroad-svg">
{{ diagram.svg }}
</div>
</div>
{% endfor %}
</body>
</html>
"""
template = Template(jinja2_template_source)
# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
NamedDiagram = NamedTuple(
"NamedDiagram",
[("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)],
)
"""
A simple structure for associating a name with a railroad diagram
"""
T = TypeVar("T")
class EachItem(railroad.Group):
"""
Custom railroad item to compose a:
- Group containing a
- OneOrMore containing a
- Choice of the elements in the Each
with the group label indicating that all must be matched
"""
all_label = "[ALL]"
def __init__(self, *items):
choice_item = railroad.Choice(len(items) - 1, *items)
one_or_more_item = railroad.OneOrMore(item=choice_item)
super().__init__(one_or_more_item, label=self.all_label)
class AnnotatedItem(railroad.Group):
"""
Simple subclass of Group that creates an annotation label
"""
def __init__(self, label: str, item):
super().__init__(item=item, label="[{}]".format(label) if label else label)
class EditablePartial(Generic[T]):
"""
Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
constructed.
"""
# We need this here because the railroad constructors actually transform the data, so can't be called until the
# entire tree is assembled
def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
self.func = func
self.args = args
self.kwargs = kwargs
@classmethod
def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
"""
If you call this function in the same way that you would call the constructor, it will store the arguments
as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
"""
return EditablePartial(func=func, args=list(args), kwargs=kwargs)
@property
def name(self):
return self.kwargs["name"]
def __call__(self) -> T:
"""
Evaluate the partial and return the result
"""
args = self.args.copy()
kwargs = self.kwargs.copy()
# This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
# args=['list', 'of', 'things'])
arg_spec = inspect.getfullargspec(self.func)
if arg_spec.varargs in self.kwargs:
args += kwargs.pop(arg_spec.varargs)
return self.func(*args, **kwargs)
def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str:
"""
Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
:params kwargs: kwargs to be passed in to the template
"""
data = []
for diagram in diagrams:
if diagram.diagram is None:
continue
io = StringIO()
diagram.diagram.writeSvg(io.write)
title = diagram.name
if diagram.index == 0:
title += " (root)"
data.append({"title": title, "text": "", "svg": io.getvalue()})
return template.render(diagrams=data, **kwargs)
def resolve_partial(partial: "EditablePartial[T]") -> T:
"""
Recursively resolves a collection of Partials into whatever type they are
"""
if isinstance(partial, EditablePartial):
partial.args = resolve_partial(partial.args)
partial.kwargs = resolve_partial(partial.kwargs)
return partial()
elif isinstance(partial, list):
return [resolve_partial(x) for x in partial]
elif isinstance(partial, dict):
return {key: resolve_partial(x) for key, x in partial.items()}
else:
return partial
def to_railroad(
element: pyparsing.ParserElement,
diagram_kwargs: typing.Optional[dict] = None,
vertical: int = 3,
show_results_names: bool = False,
show_groups: bool = False,
) -> List[NamedDiagram]:
"""
Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
creation if you want to access the Railroad tree before it is converted to HTML
:param element: base element of the parser being diagrammed
:param diagram_kwargs: kwargs to pass to the Diagram() constructor
:param vertical: (optional) - int - limit at which number of alternatives should be
shown vertically instead of horizontally
:param show_results_names - bool to indicate whether results name annotations should be
included in the diagram
:param show_groups - bool to indicate whether groups should be highlighted with an unlabeled
surrounding box
"""
# Convert the whole tree underneath the root
lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
_to_diagram_element(
element,
lookup=lookup,
parent=None,
vertical=vertical,
show_results_names=show_results_names,
show_groups=show_groups,
)
root_id = id(element)
# Convert the root if it hasn't been already
if root_id in lookup:
if not element.customName:
lookup[root_id].name = ""
lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
# Now that we're finished, we can convert from intermediate structures into Railroad elements
diags = list(lookup.diagrams.values())
if len(diags) > 1:
# collapse out duplicate diags with the same name
seen = set()
deduped_diags = []
for d in diags:
# don't extract SkipTo elements, they are uninformative as subdiagrams
if d.name == "...":
continue
if d.name is not None and d.name not in seen:
seen.add(d.name)
deduped_diags.append(d)
resolved = [resolve_partial(partial) for partial in deduped_diags]
else:
# special case - if just one diagram, always display it, even if
# it has no name
resolved = [resolve_partial(partial) for partial in diags]
return sorted(resolved, key=lambda diag: diag.index)
def _should_vertical(
specification: int, exprs: Iterable[pyparsing.ParserElement]
) -> bool:
"""
Returns true if we should return a vertical list of elements
"""
if specification is None:
return False
else:
return len(_visible_exprs(exprs)) >= specification
class ElementState:
"""
State recorded for an individual pyparsing Element
"""
# Note: this should be a dataclass, but we have to support Python 3.5
def __init__(
self,
element: pyparsing.ParserElement,
converted: EditablePartial,
parent: EditablePartial,
number: int,
name: str = None,
parent_index: typing.Optional[int] = None,
):
#: The pyparsing element that this represents
self.element: pyparsing.ParserElement = element
#: The name of the element
self.name: typing.Optional[str] = name
#: The output Railroad element in an unconverted state
self.converted: EditablePartial = converted
#: The parent Railroad element, which we store so that we can extract this if it's duplicated
self.parent: EditablePartial = parent
#: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
self.number: int = number
#: The index of this inside its parent
self.parent_index: typing.Optional[int] = parent_index
#: If true, we should extract this out into a subdiagram
self.extract: bool = False
#: If true, all of this element's children have been filled out
self.complete: bool = False
def mark_for_extraction(
self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
):
"""
Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
:param el_id: id of the element
:param state: element/diagram state tracker
:param name: name to use for this element's text
:param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
root element when we know we're finished
"""
self.extract = True
# Set the name
if not self.name:
if name:
# Allow forcing a custom name
self.name = name
elif self.element.customName:
self.name = self.element.customName
else:
self.name = ""
# Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
# to be added
# Also, if this is just a string literal etc, don't bother extracting it
if force or (self.complete and _worth_extracting(self.element)):
state.extract_into_diagram(el_id)
class ConverterState:
"""
Stores some state that persists between recursions into the element tree
"""
def __init__(self, diagram_kwargs: typing.Optional[dict] = None):
#: A dictionary mapping ParserElements to state relating to them
self._element_diagram_states: Dict[int, ElementState] = {}
#: A dictionary mapping ParserElement IDs to subdiagrams generated from them
self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
#: The index of the next unnamed element
self.unnamed_index: int = 1
#: The index of the next element. This is used for sorting
self.index: int = 0
#: Shared kwargs that are used to customize the construction of diagrams
self.diagram_kwargs: dict = diagram_kwargs or {}
self.extracted_diagram_names: Set[str] = set()
def __setitem__(self, key: int, value: ElementState):
self._element_diagram_states[key] = value
def __getitem__(self, key: int) -> ElementState:
return self._element_diagram_states[key]
def __delitem__(self, key: int):
del self._element_diagram_states[key]
def __contains__(self, key: int):
return key in self._element_diagram_states
def generate_unnamed(self) -> int:
"""
Generate a number used in the name of an otherwise unnamed diagram
"""
self.unnamed_index += 1
return self.unnamed_index
def generate_index(self) -> int:
"""
Generate a number used to index a diagram
"""
self.index += 1
return self.index
def extract_into_diagram(self, el_id: int):
"""
Used when we encounter the same token twice in the same tree. When this
happens, we replace all instances of that token with a terminal, and
create a new subdiagram for the token
"""
position = self[el_id]
# Replace the original definition of this element with a regular block
if position.parent:
ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
if "item" in position.parent.kwargs:
position.parent.kwargs["item"] = ret
elif "items" in position.parent.kwargs:
position.parent.kwargs["items"][position.parent_index] = ret
# If the element we're extracting is a group, skip to its content but keep the title
if position.converted.func == railroad.Group:
content = position.converted.kwargs["item"]
else:
content = position.converted
self.diagrams[el_id] = EditablePartial.from_call(
NamedDiagram,
name=position.name,
diagram=EditablePartial.from_call(
railroad.Diagram, content, **self.diagram_kwargs
),
index=position.number,
)
del self[el_id]
def _worth_extracting(element: pyparsing.ParserElement) -> bool:
"""
Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
themselves have children, then its complex enough to extract
"""
children = element.recurse()
return any(child.recurse() for child in children)
def _apply_diagram_item_enhancements(fn):
"""
decorator to ensure enhancements to a diagram item (such as results name annotations)
get applied on return from _to_diagram_element (we do this since there are several
returns in _to_diagram_element)
"""
def _inner(
element: pyparsing.ParserElement,
parent: typing.Optional[EditablePartial],
lookup: ConverterState = None,
vertical: int = None,
index: int = 0,
name_hint: str = None,
show_results_names: bool = False,
show_groups: bool = False,
) -> typing.Optional[EditablePartial]:
ret = fn(
element,
parent,
lookup,
vertical,
index,
name_hint,
show_results_names,
show_groups,
)
# apply annotation for results name, if present
if show_results_names and ret is not None:
element_results_name = element.resultsName
if element_results_name:
# add "*" to indicate if this is a "list all results" name
element_results_name += "" if element.modalResults else "*"
ret = EditablePartial.from_call(
railroad.Group, item=ret, label=element_results_name
)
return ret
return _inner
def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
non_diagramming_exprs = (
pyparsing.ParseElementEnhance,
pyparsing.PositionToken,
pyparsing.And._ErrorStop,
)
return [
e
for e in exprs
if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
]
@_apply_diagram_item_enhancements
def _to_diagram_element(
element: pyparsing.ParserElement,
parent: typing.Optional[EditablePartial],
lookup: ConverterState = None,
vertical: int = None,
index: int = 0,
name_hint: str = None,
show_results_names: bool = False,
show_groups: bool = False,
) -> typing.Optional[EditablePartial]:
"""
Recursively converts a PyParsing Element to a railroad Element
:param lookup: The shared converter state that keeps track of useful things
:param index: The index of this element within the parent
:param parent: The parent of this element in the output tree
:param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
do so
:param name_hint: If provided, this will override the generated name
:param show_results_names: bool flag indicating whether to add annotations for results names
:returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
:param show_groups: bool flag indicating whether to show groups using bounding box
"""
exprs = element.recurse()
name = name_hint or element.customName or element.__class__.__name__
# Python's id() is used to provide a unique identifier for elements
el_id = id(element)
element_results_name = element.resultsName
# Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
if not element.customName:
if isinstance(
element,
(
# pyparsing.TokenConverter,
# pyparsing.Forward,
pyparsing.Located,
),
):
# However, if this element has a useful custom name, and its child does not, we can pass it on to the child
if exprs:
if not exprs[0].customName:
propagated_name = name
else:
propagated_name = None
return _to_diagram_element(
element.expr,
parent=parent,
lookup=lookup,
vertical=vertical,
index=index,
name_hint=propagated_name,
show_results_names=show_results_names,
show_groups=show_groups,
)
# If the element isn't worth extracting, we always treat it as the first time we say it
if _worth_extracting(element):
if el_id in lookup:
# If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
# so we have to extract it into a new diagram.
looked_up = lookup[el_id]
looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
return ret
elif el_id in lookup.diagrams:
# If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
# just put in a marker element that refers to the sub-diagram
ret = EditablePartial.from_call(
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
)
return ret
# Recursively convert child elements
# Here we find the most relevant Railroad element for matching pyparsing Element
# We use ``items=[]`` here to hold the place for where the child elements will go once created
if isinstance(element, pyparsing.And):
# detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
# (all will have the same name, and resultsName)
if not exprs:
return None
if len(set((e.name, e.resultsName) for e in exprs)) == 1:
ret = EditablePartial.from_call(
railroad.OneOrMore, item="", repeat=str(len(exprs))
)
elif _should_vertical(vertical, exprs):
ret = EditablePartial.from_call(railroad.Stack, items=[])
else:
ret = EditablePartial.from_call(railroad.Sequence, items=[])
elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
if not exprs:
return None
if _should_vertical(vertical, exprs):
ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
else:
ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
elif isinstance(element, pyparsing.Each):
if not exprs:
return None
ret = EditablePartial.from_call(EachItem, items=[])
elif isinstance(element, pyparsing.NotAny):
ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
elif isinstance(element, pyparsing.FollowedBy):
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
elif isinstance(element, pyparsing.PrecededBy):
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
elif isinstance(element, pyparsing.Group):
if show_groups:
ret = EditablePartial.from_call(AnnotatedItem, label="", item="")
else:
ret = EditablePartial.from_call(railroad.Group, label="", item="")
elif isinstance(element, pyparsing.TokenConverter):
ret = EditablePartial.from_call(
AnnotatedItem, label=type(element).__name__.lower(), item=""
)
elif isinstance(element, pyparsing.Opt):
ret = EditablePartial.from_call(railroad.Optional, item="")
elif isinstance(element, pyparsing.OneOrMore):
ret = EditablePartial.from_call(railroad.OneOrMore, item="")
elif isinstance(element, pyparsing.ZeroOrMore):
ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
elif isinstance(element, pyparsing.Group):
ret = EditablePartial.from_call(
railroad.Group, item=None, label=element_results_name
)
elif isinstance(element, pyparsing.Empty) and not element.customName:
# Skip unnamed "Empty" elements
ret = None
elif len(exprs) > 1:
ret = EditablePartial.from_call(railroad.Sequence, items=[])
elif len(exprs) > 0 and not element_results_name:
ret = EditablePartial.from_call(railroad.Group, item="", label=name)
else:
terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
ret = terminal
if ret is None:
return
# Indicate this element's position in the tree so we can extract it if necessary
lookup[el_id] = ElementState(
element=element,
converted=ret,
parent=parent,
parent_index=index,
number=lookup.generate_index(),
)
if element.customName:
lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
i = 0
for expr in exprs:
# Add a placeholder index in case we have to extract the child before we even add it to the parent
if "items" in ret.kwargs:
ret.kwargs["items"].insert(i, None)
item = _to_diagram_element(
expr,
parent=ret,
lookup=lookup,
vertical=vertical,
index=i,
show_results_names=show_results_names,
show_groups=show_groups,
)
# Some elements don't need to be shown in the diagram
if item is not None:
if "item" in ret.kwargs:
ret.kwargs["item"] = item
elif "items" in ret.kwargs:
# If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
ret.kwargs["items"][i] = item
i += 1
elif "items" in ret.kwargs:
# If we're supposed to skip this element, remove it from the parent
del ret.kwargs["items"][i]
# If all this items children are none, skip this item
if ret and (
("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
or ("item" in ret.kwargs and ret.kwargs["item"] is None)
):
ret = EditablePartial.from_call(railroad.Terminal, name)
# Mark this element as "complete", ie it has all of its children
if el_id in lookup:
lookup[el_id].complete = True
if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
lookup.extract_into_diagram(el_id)
if ret is not None:
ret = EditablePartial.from_call(
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
)
return ret

View file

@ -0,0 +1,267 @@
# exceptions.py
import re
import sys
import typing
from .util import col, line, lineno, _collapse_string_to_ranges
from .unicode import pyparsing_unicode as ppu
class ExceptionWordUnicode(ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic):
pass
_extract_alphanums = _collapse_string_to_ranges(ExceptionWordUnicode.alphanums)
_exception_word_extractor = re.compile("([" + _extract_alphanums + "]{1,16})|.")
class ParseBaseException(Exception):
"""base exception class for all parsing runtime exceptions"""
# Performance tuning: we construct a *lot* of these, so keep this
# constructor as small and fast as possible
def __init__(
self,
pstr: str,
loc: int = 0,
msg: typing.Optional[str] = None,
elem=None,
):
self.loc = loc
if msg is None:
self.msg = pstr
self.pstr = ""
else:
self.msg = msg
self.pstr = pstr
self.parser_element = self.parserElement = elem
self.args = (pstr, loc, msg)
@staticmethod
def explain_exception(exc, depth=16):
"""
Method to take an exception and translate the Python internal traceback into a list
of the pyparsing expressions that caused the exception to be raised.
Parameters:
- exc - exception raised during parsing (need not be a ParseException, in support
of Python exceptions that might be raised in a parse action)
- depth (default=16) - number of levels back in the stack trace to list expression
and function names; if None, the full stack trace names will be listed; if 0, only
the failing input line, marker, and exception string will be shown
Returns a multi-line string listing the ParserElements and/or function names in the
exception's stack trace.
"""
import inspect
from .core import ParserElement
if depth is None:
depth = sys.getrecursionlimit()
ret = []
if isinstance(exc, ParseBaseException):
ret.append(exc.line)
ret.append(" " * (exc.column - 1) + "^")
ret.append("{}: {}".format(type(exc).__name__, exc))
if depth > 0:
callers = inspect.getinnerframes(exc.__traceback__, context=depth)
seen = set()
for i, ff in enumerate(callers[-depth:]):
frm = ff[0]
f_self = frm.f_locals.get("self", None)
if isinstance(f_self, ParserElement):
if frm.f_code.co_name not in ("parseImpl", "_parseNoCache"):
continue
if id(f_self) in seen:
continue
seen.add(id(f_self))
self_type = type(f_self)
ret.append(
"{}.{} - {}".format(
self_type.__module__, self_type.__name__, f_self
)
)
elif f_self is not None:
self_type = type(f_self)
ret.append("{}.{}".format(self_type.__module__, self_type.__name__))
else:
code = frm.f_code
if code.co_name in ("wrapper", "<module>"):
continue
ret.append("{}".format(code.co_name))
depth -= 1
if not depth:
break
return "\n".join(ret)
@classmethod
def _from_exception(cls, pe):
"""
internal factory method to simplify creating one type of ParseException
from another - avoids having __init__ signature conflicts among subclasses
"""
return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
@property
def line(self) -> str:
"""
Return the line of text where the exception occurred.
"""
return line(self.loc, self.pstr)
@property
def lineno(self) -> int:
"""
Return the 1-based line number of text where the exception occurred.
"""
return lineno(self.loc, self.pstr)
@property
def col(self) -> int:
"""
Return the 1-based column on the line of text where the exception occurred.
"""
return col(self.loc, self.pstr)
@property
def column(self) -> int:
"""
Return the 1-based column on the line of text where the exception occurred.
"""
return col(self.loc, self.pstr)
def __str__(self) -> str:
if self.pstr:
if self.loc >= len(self.pstr):
foundstr = ", found end of text"
else:
# pull out next word at error location
found_match = _exception_word_extractor.match(self.pstr, self.loc)
if found_match is not None:
found = found_match.group(0)
else:
found = self.pstr[self.loc : self.loc + 1]
foundstr = (", found %r" % found).replace(r"\\", "\\")
else:
foundstr = ""
return "{}{} (at char {}), (line:{}, col:{})".format(
self.msg, foundstr, self.loc, self.lineno, self.column
)
def __repr__(self):
return str(self)
def mark_input_line(self, marker_string: str = None, *, markerString=">!<") -> str:
"""
Extracts the exception line from the input string, and marks
the location of the exception with a special symbol.
"""
markerString = marker_string if marker_string is not None else markerString
line_str = self.line
line_column = self.column - 1
if markerString:
line_str = "".join(
(line_str[:line_column], markerString, line_str[line_column:])
)
return line_str.strip()
def explain(self, depth=16) -> str:
"""
Method to translate the Python internal traceback into a list
of the pyparsing expressions that caused the exception to be raised.
Parameters:
- depth (default=16) - number of levels back in the stack trace to list expression
and function names; if None, the full stack trace names will be listed; if 0, only
the failing input line, marker, and exception string will be shown
Returns a multi-line string listing the ParserElements and/or function names in the
exception's stack trace.
Example::
expr = pp.Word(pp.nums) * 3
try:
expr.parse_string("123 456 A789")
except pp.ParseException as pe:
print(pe.explain(depth=0))
prints::
123 456 A789
^
ParseException: Expected W:(0-9), found 'A' (at char 8), (line:1, col:9)
Note: the diagnostic output will include string representations of the expressions
that failed to parse. These representations will be more helpful if you use `set_name` to
give identifiable names to your expressions. Otherwise they will use the default string
forms, which may be cryptic to read.
Note: pyparsing's default truncation of exception tracebacks may also truncate the
stack of expressions that are displayed in the ``explain`` output. To get the full listing
of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True``
"""
return self.explain_exception(self, depth)
markInputline = mark_input_line
class ParseException(ParseBaseException):
"""
Exception thrown when a parse expression doesn't match the input string
Example::
try:
Word(nums).set_name("integer").parse_string("ABC")
except ParseException as pe:
print(pe)
print("column: {}".format(pe.column))
prints::
Expected integer (at char 0), (line:1, col:1)
column: 1
"""
class ParseFatalException(ParseBaseException):
"""
User-throwable exception thrown when inconsistent parse content
is found; stops all parsing immediately
"""
class ParseSyntaxException(ParseFatalException):
"""
Just like :class:`ParseFatalException`, but thrown internally
when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
that parsing is to stop immediately because an unbacktrackable
syntax error has been found.
"""
class RecursiveGrammarException(Exception):
"""
Exception thrown by :class:`ParserElement.validate` if the
grammar could be left-recursive; parser may need to enable
left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>`
"""
def __init__(self, parseElementList):
self.parseElementTrace = parseElementList
def __str__(self) -> str:
return "RecursiveGrammarException: {}".format(self.parseElementTrace)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,760 @@
# results.py
from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator
import pprint
from weakref import ref as wkref
from typing import Tuple, Any
str_type: Tuple[type, ...] = (str, bytes)
_generator_type = type((_ for _ in ()))
class _ParseResultsWithOffset:
__slots__ = ["tup"]
def __init__(self, p1, p2):
self.tup = (p1, p2)
def __getitem__(self, i):
return self.tup[i]
def __getstate__(self):
return self.tup
def __setstate__(self, *args):
self.tup = args[0]
class ParseResults:
"""Structured parse results, to provide multiple means of access to
the parsed data:
- as a list (``len(results)``)
- by list index (``results[0], results[1]``, etc.)
- by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
Example::
integer = Word(nums)
date_str = (integer.set_results_name("year") + '/'
+ integer.set_results_name("month") + '/'
+ integer.set_results_name("day"))
# equivalent form:
# date_str = (integer("year") + '/'
# + integer("month") + '/'
# + integer("day"))
# parse_string returns a ParseResults object
result = date_str.parse_string("1999/12/31")
def test(s, fn=repr):
print("{} -> {}".format(s, fn(eval(s))))
test("list(result)")
test("result[0]")
test("result['month']")
test("result.day")
test("'month' in result")
test("'minutes' in result")
test("result.dump()", str)
prints::
list(result) -> ['1999', '/', '12', '/', '31']
result[0] -> '1999'
result['month'] -> '12'
result.day -> '31'
'month' in result -> True
'minutes' in result -> False
result.dump() -> ['1999', '/', '12', '/', '31']
- day: '31'
- month: '12'
- year: '1999'
"""
_null_values: Tuple[Any, ...] = (None, [], "", ())
__slots__ = [
"_name",
"_parent",
"_all_names",
"_modal",
"_toklist",
"_tokdict",
"__weakref__",
]
class List(list):
"""
Simple wrapper class to distinguish parsed list results that should be preserved
as actual Python lists, instead of being converted to :class:`ParseResults`:
LBRACK, RBRACK = map(pp.Suppress, "[]")
element = pp.Forward()
item = ppc.integer
element_list = LBRACK + pp.delimited_list(element) + RBRACK
# add parse actions to convert from ParseResults to actual Python collection types
def as_python_list(t):
return pp.ParseResults.List(t.as_list())
element_list.add_parse_action(as_python_list)
element <<= item | element_list
element.run_tests('''
100
[2,3,4]
[[2, 1],3,4]
[(2, 1),3,4]
(2,3,4)
''', post_parse=lambda s, r: (r[0], type(r[0])))
prints:
100
(100, <class 'int'>)
[2,3,4]
([2, 3, 4], <class 'list'>)
[[2, 1],3,4]
([[2, 1], 3, 4], <class 'list'>)
(Used internally by :class:`Group` when `aslist=True`.)
"""
def __new__(cls, contained=None):
if contained is None:
contained = []
if not isinstance(contained, list):
raise TypeError(
"{} may only be constructed with a list,"
" not {}".format(cls.__name__, type(contained).__name__)
)
return list.__new__(cls)
def __new__(cls, toklist=None, name=None, **kwargs):
if isinstance(toklist, ParseResults):
return toklist
self = object.__new__(cls)
self._name = None
self._parent = None
self._all_names = set()
if toklist is None:
self._toklist = []
elif isinstance(toklist, (list, _generator_type)):
self._toklist = (
[toklist[:]]
if isinstance(toklist, ParseResults.List)
else list(toklist)
)
else:
self._toklist = [toklist]
self._tokdict = dict()
return self
# Performance tuning: we construct a *lot* of these, so keep this
# constructor as small and fast as possible
def __init__(
self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
):
self._modal = modal
if name is not None and name != "":
if isinstance(name, int):
name = str(name)
if not modal:
self._all_names = {name}
self._name = name
if toklist not in self._null_values:
if isinstance(toklist, (str_type, type)):
toklist = [toklist]
if asList:
if isinstance(toklist, ParseResults):
self[name] = _ParseResultsWithOffset(
ParseResults(toklist._toklist), 0
)
else:
self[name] = _ParseResultsWithOffset(
ParseResults(toklist[0]), 0
)
self[name]._name = name
else:
try:
self[name] = toklist[0]
except (KeyError, TypeError, IndexError):
if toklist is not self:
self[name] = toklist
else:
self._name = name
def __getitem__(self, i):
if isinstance(i, (int, slice)):
return self._toklist[i]
else:
if i not in self._all_names:
return self._tokdict[i][-1][0]
else:
return ParseResults([v[0] for v in self._tokdict[i]])
def __setitem__(self, k, v, isinstance=isinstance):
if isinstance(v, _ParseResultsWithOffset):
self._tokdict[k] = self._tokdict.get(k, list()) + [v]
sub = v[0]
elif isinstance(k, (int, slice)):
self._toklist[k] = v
sub = v
else:
self._tokdict[k] = self._tokdict.get(k, list()) + [
_ParseResultsWithOffset(v, 0)
]
sub = v
if isinstance(sub, ParseResults):
sub._parent = wkref(self)
def __delitem__(self, i):
if isinstance(i, (int, slice)):
mylen = len(self._toklist)
del self._toklist[i]
# convert int to slice
if isinstance(i, int):
if i < 0:
i += mylen
i = slice(i, i + 1)
# get removed indices
removed = list(range(*i.indices(mylen)))
removed.reverse()
# fixup indices in token dictionary
for name, occurrences in self._tokdict.items():
for j in removed:
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(
value, position - (position > j)
)
else:
del self._tokdict[i]
def __contains__(self, k) -> bool:
return k in self._tokdict
def __len__(self) -> int:
return len(self._toklist)
def __bool__(self) -> bool:
return not not (self._toklist or self._tokdict)
def __iter__(self) -> Iterator:
return iter(self._toklist)
def __reversed__(self) -> Iterator:
return iter(self._toklist[::-1])
def keys(self):
return iter(self._tokdict)
def values(self):
return (self[k] for k in self.keys())
def items(self):
return ((k, self[k]) for k in self.keys())
def haskeys(self) -> bool:
"""
Since ``keys()`` returns an iterator, this method is helpful in bypassing
code that looks for the existence of any defined results names."""
return bool(self._tokdict)
def pop(self, *args, **kwargs):
"""
Removes and returns item at specified index (default= ``last``).
Supports both ``list`` and ``dict`` semantics for ``pop()``. If
passed no argument or an integer argument, it will use ``list``
semantics and pop tokens from the list of parsed tokens. If passed
a non-integer argument (most likely a string), it will use ``dict``
semantics and pop the corresponding value from any defined results
names. A second default return value argument is supported, just as in
``dict.pop()``.
Example::
numlist = Word(nums)[...]
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
def remove_first(tokens):
tokens.pop(0)
numlist.add_parse_action(remove_first)
print(numlist.parse_string("0 123 321")) # -> ['123', '321']
label = Word(alphas)
patt = label("LABEL") + Word(nums)[1, ...]
print(patt.parse_string("AAB 123 321").dump())
# Use pop() in a parse action to remove named result (note that corresponding value is not
# removed from list form of results)
def remove_LABEL(tokens):
tokens.pop("LABEL")
return tokens
patt.add_parse_action(remove_LABEL)
print(patt.parse_string("AAB 123 321").dump())
prints::
['AAB', '123', '321']
- LABEL: 'AAB'
['AAB', '123', '321']
"""
if not args:
args = [-1]
for k, v in kwargs.items():
if k == "default":
args = (args[0], v)
else:
raise TypeError(
"pop() got an unexpected keyword argument {!r}".format(k)
)
if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
index = args[0]
ret = self[index]
del self[index]
return ret
else:
defaultvalue = args[1]
return defaultvalue
def get(self, key, default_value=None):
"""
Returns named result matching the given key, or if there is no
such name, then returns the given ``default_value`` or ``None`` if no
``default_value`` is specified.
Similar to ``dict.get()``.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parse_string("1999/12/31")
print(result.get("year")) # -> '1999'
print(result.get("hour", "not specified")) # -> 'not specified'
print(result.get("hour")) # -> None
"""
if key in self:
return self[key]
else:
return default_value
def insert(self, index, ins_string):
"""
Inserts new element at location index in the list of parsed tokens.
Similar to ``list.insert()``.
Example::
numlist = Word(nums)[...]
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
# use a parse action to insert the parse location in the front of the parsed results
def insert_locn(locn, tokens):
tokens.insert(0, locn)
numlist.add_parse_action(insert_locn)
print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
"""
self._toklist.insert(index, ins_string)
# fixup indices in token dictionary
for name, occurrences in self._tokdict.items():
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(
value, position + (position > index)
)
def append(self, item):
"""
Add single element to end of ``ParseResults`` list of elements.
Example::
numlist = Word(nums)[...]
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
# use a parse action to compute the sum of the parsed integers, and add it to the end
def append_sum(tokens):
tokens.append(sum(map(int, tokens)))
numlist.add_parse_action(append_sum)
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
"""
self._toklist.append(item)
def extend(self, itemseq):
"""
Add sequence of elements to end of ``ParseResults`` list of elements.
Example::
patt = Word(alphas)[1, ...]
# use a parse action to append the reverse of the matched strings, to make a palindrome
def make_palindrome(tokens):
tokens.extend(reversed([t[::-1] for t in tokens]))
return ''.join(tokens)
patt.add_parse_action(make_palindrome)
print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
"""
if isinstance(itemseq, ParseResults):
self.__iadd__(itemseq)
else:
self._toklist.extend(itemseq)
def clear(self):
"""
Clear all elements and results names.
"""
del self._toklist[:]
self._tokdict.clear()
def __getattr__(self, name):
try:
return self[name]
except KeyError:
if name.startswith("__"):
raise AttributeError(name)
return ""
def __add__(self, other) -> "ParseResults":
ret = self.copy()
ret += other
return ret
def __iadd__(self, other) -> "ParseResults":
if other._tokdict:
offset = len(self._toklist)
addoffset = lambda a: offset if a < 0 else a + offset
otheritems = other._tokdict.items()
otherdictitems = [
(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
for k, vlist in otheritems
for v in vlist
]
for k, v in otherdictitems:
self[k] = v
if isinstance(v[0], ParseResults):
v[0]._parent = wkref(self)
self._toklist += other._toklist
self._all_names |= other._all_names
return self
def __radd__(self, other) -> "ParseResults":
if isinstance(other, int) and other == 0:
# useful for merging many ParseResults using sum() builtin
return self.copy()
else:
# this may raise a TypeError - so be it
return other + self
def __repr__(self) -> str:
return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict())
def __str__(self) -> str:
return (
"["
+ ", ".join(
[
str(i) if isinstance(i, ParseResults) else repr(i)
for i in self._toklist
]
)
+ "]"
)
def _asStringList(self, sep=""):
out = []
for item in self._toklist:
if out and sep:
out.append(sep)
if isinstance(item, ParseResults):
out += item._asStringList()
else:
out.append(str(item))
return out
def as_list(self) -> list:
"""
Returns the parse results as a nested list of matching tokens, all converted to strings.
Example::
patt = Word(alphas)[1, ...]
result = patt.parse_string("sldkj lsdkj sldkj")
# even though the result prints in string-like form, it is actually a pyparsing ParseResults
print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
# Use as_list() to create an actual list
result_list = result.as_list()
print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
"""
return [
res.as_list() if isinstance(res, ParseResults) else res
for res in self._toklist
]
def as_dict(self) -> dict:
"""
Returns the named parse results as a nested dictionary.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parse_string('12/31/1999')
print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
result_dict = result.as_dict()
print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
# even though a ParseResults supports dict-like access, sometime you just need to have a dict
import json
print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
"""
def to_item(obj):
if isinstance(obj, ParseResults):
return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
else:
return obj
return dict((k, to_item(v)) for k, v in self.items())
def copy(self) -> "ParseResults":
"""
Returns a new copy of a :class:`ParseResults` object.
"""
ret = ParseResults(self._toklist)
ret._tokdict = self._tokdict.copy()
ret._parent = self._parent
ret._all_names |= self._all_names
ret._name = self._name
return ret
def get_name(self):
r"""
Returns the results name for this token expression. Useful when several
different expressions might match at a particular location.
Example::
integer = Word(nums)
ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
house_number_expr = Suppress('#') + Word(nums, alphanums)
user_data = (Group(house_number_expr)("house_number")
| Group(ssn_expr)("ssn")
| Group(integer)("age"))
user_info = user_data[1, ...]
result = user_info.parse_string("22 111-22-3333 #221B")
for item in result:
print(item.get_name(), ':', item[0])
prints::
age : 22
ssn : 111-22-3333
house_number : 221B
"""
if self._name:
return self._name
elif self._parent:
par = self._parent()
def find_in_parent(sub):
return next(
(
k
for k, vlist in par._tokdict.items()
for v, loc in vlist
if sub is v
),
None,
)
return find_in_parent(self) if par else None
elif (
len(self) == 1
and len(self._tokdict) == 1
and next(iter(self._tokdict.values()))[0][1] in (0, -1)
):
return next(iter(self._tokdict.keys()))
else:
return None
def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
"""
Diagnostic method for listing out the contents of
a :class:`ParseResults`. Accepts an optional ``indent`` argument so
that this string can be embedded in a nested display of other data.
Example::
integer = Word(nums)
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
result = date_str.parse_string('1999/12/31')
print(result.dump())
prints::
['1999', '/', '12', '/', '31']
- day: '31'
- month: '12'
- year: '1999'
"""
out = []
NL = "\n"
out.append(indent + str(self.as_list()) if include_list else "")
if full:
if self.haskeys():
items = sorted((str(k), v) for k, v in self.items())
for k, v in items:
if out:
out.append(NL)
out.append("{}{}- {}: ".format(indent, (" " * _depth), k))
if isinstance(v, ParseResults):
if v:
out.append(
v.dump(
indent=indent,
full=full,
include_list=include_list,
_depth=_depth + 1,
)
)
else:
out.append(str(v))
else:
out.append(repr(v))
if any(isinstance(vv, ParseResults) for vv in self):
v = self
for i, vv in enumerate(v):
if isinstance(vv, ParseResults):
out.append(
"\n{}{}[{}]:\n{}{}{}".format(
indent,
(" " * (_depth)),
i,
indent,
(" " * (_depth + 1)),
vv.dump(
indent=indent,
full=full,
include_list=include_list,
_depth=_depth + 1,
),
)
)
else:
out.append(
"\n%s%s[%d]:\n%s%s%s"
% (
indent,
(" " * (_depth)),
i,
indent,
(" " * (_depth + 1)),
str(vv),
)
)
return "".join(out)
def pprint(self, *args, **kwargs):
"""
Pretty-printer for parsed results as a list, using the
`pprint <https://docs.python.org/3/library/pprint.html>`_ module.
Accepts additional positional or keyword args as defined for
`pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
Example::
ident = Word(alphas, alphanums)
num = Word(nums)
func = Forward()
term = ident | num | Group('(' + func + ')')
func <<= ident + Group(Optional(delimited_list(term)))
result = func.parse_string("fna a,b,(fnb c,d,200),100")
result.pprint(width=40)
prints::
['fna',
['a',
'b',
['(', 'fnb', ['c', 'd', '200'], ')'],
'100']]
"""
pprint.pprint(self.as_list(), *args, **kwargs)
# add support for pickle protocol
def __getstate__(self):
return (
self._toklist,
(
self._tokdict.copy(),
self._parent is not None and self._parent() or None,
self._all_names,
self._name,
),
)
def __setstate__(self, state):
self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
self._all_names = set(inAccumNames)
if par is not None:
self._parent = wkref(par)
else:
self._parent = None
def __getnewargs__(self):
return self._toklist, self._name
def __dir__(self):
return dir(type(self)) + list(self.keys())
@classmethod
def from_dict(cls, other, name=None) -> "ParseResults":
"""
Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
name-value relations as results names. If an optional ``name`` argument is
given, a nested ``ParseResults`` will be returned.
"""
def is_iterable(obj):
try:
iter(obj)
except Exception:
return False
else:
return not isinstance(obj, str_type)
ret = cls([])
for k, v in other.items():
if isinstance(v, Mapping):
ret += cls.from_dict(v, name=k)
else:
ret += cls([v], name=k, asList=is_iterable(v))
if name is not None:
ret = cls([ret], name=name)
return ret
asList = as_list
asDict = as_dict
getName = get_name
MutableMapping.register(ParseResults)
MutableSequence.register(ParseResults)

View file

@ -0,0 +1,331 @@
# testing.py
from contextlib import contextmanager
import typing
from .core import (
ParserElement,
ParseException,
Keyword,
__diag__,
__compat__,
)
class pyparsing_test:
"""
namespace class for classes useful in writing unit tests
"""
class reset_pyparsing_context:
"""
Context manager to be used when writing unit tests that modify pyparsing config values:
- packrat parsing
- bounded recursion parsing
- default whitespace characters.
- default keyword characters
- literal string auto-conversion class
- __diag__ settings
Example::
with reset_pyparsing_context():
# test that literals used to construct a grammar are automatically suppressed
ParserElement.inlineLiteralsUsing(Suppress)
term = Word(alphas) | Word(nums)
group = Group('(' + term[...] + ')')
# assert that the '()' characters are not included in the parsed tokens
self.assertParseAndCheckList(group, "(abc 123 def)", ['abc', '123', 'def'])
# after exiting context manager, literals are converted to Literal expressions again
"""
def __init__(self):
self._save_context = {}
def save(self):
self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
self._save_context[
"literal_string_class"
] = ParserElement._literalStringClass
self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace
self._save_context["packrat_enabled"] = ParserElement._packratEnabled
if ParserElement._packratEnabled:
self._save_context[
"packrat_cache_size"
] = ParserElement.packrat_cache.size
else:
self._save_context["packrat_cache_size"] = None
self._save_context["packrat_parse"] = ParserElement._parse
self._save_context[
"recursion_enabled"
] = ParserElement._left_recursion_enabled
self._save_context["__diag__"] = {
name: getattr(__diag__, name) for name in __diag__._all_names
}
self._save_context["__compat__"] = {
"collect_all_And_tokens": __compat__.collect_all_And_tokens
}
return self
def restore(self):
# reset pyparsing global state
if (
ParserElement.DEFAULT_WHITE_CHARS
!= self._save_context["default_whitespace"]
):
ParserElement.set_default_whitespace_chars(
self._save_context["default_whitespace"]
)
ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"]
Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
ParserElement.inlineLiteralsUsing(
self._save_context["literal_string_class"]
)
for name, value in self._save_context["__diag__"].items():
(__diag__.enable if value else __diag__.disable)(name)
ParserElement._packratEnabled = False
if self._save_context["packrat_enabled"]:
ParserElement.enable_packrat(self._save_context["packrat_cache_size"])
else:
ParserElement._parse = self._save_context["packrat_parse"]
ParserElement._left_recursion_enabled = self._save_context[
"recursion_enabled"
]
__compat__.collect_all_And_tokens = self._save_context["__compat__"]
return self
def copy(self):
ret = type(self)()
ret._save_context.update(self._save_context)
return ret
def __enter__(self):
return self.save()
def __exit__(self, *args):
self.restore()
class TestParseResultsAsserts:
"""
A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
"""
def assertParseResultsEquals(
self, result, expected_list=None, expected_dict=None, msg=None
):
"""
Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``,
and compare any defined results names with an optional ``expected_dict``.
"""
if expected_list is not None:
self.assertEqual(expected_list, result.as_list(), msg=msg)
if expected_dict is not None:
self.assertEqual(expected_dict, result.as_dict(), msg=msg)
def assertParseAndCheckList(
self, expr, test_string, expected_list, msg=None, verbose=True
):
"""
Convenience wrapper assert to test a parser element and input string, and assert that
the resulting ``ParseResults.asList()`` is equal to the ``expected_list``.
"""
result = expr.parse_string(test_string, parse_all=True)
if verbose:
print(result.dump())
else:
print(result.as_list())
self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
def assertParseAndCheckDict(
self, expr, test_string, expected_dict, msg=None, verbose=True
):
"""
Convenience wrapper assert to test a parser element and input string, and assert that
the resulting ``ParseResults.asDict()`` is equal to the ``expected_dict``.
"""
result = expr.parse_string(test_string, parseAll=True)
if verbose:
print(result.dump())
else:
print(result.as_list())
self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
def assertRunTestResults(
self, run_tests_report, expected_parse_results=None, msg=None
):
"""
Unit test assertion to evaluate output of ``ParserElement.runTests()``. If a list of
list-dict tuples is given as the ``expected_parse_results`` argument, then these are zipped
with the report tuples returned by ``runTests`` and evaluated using ``assertParseResultsEquals``.
Finally, asserts that the overall ``runTests()`` success value is ``True``.
:param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
:param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
"""
run_test_success, run_test_results = run_tests_report
if expected_parse_results is not None:
merged = [
(*rpt, expected)
for rpt, expected in zip(run_test_results, expected_parse_results)
]
for test_string, result, expected in merged:
# expected should be a tuple containing a list and/or a dict or an exception,
# and optional failure message string
# an empty tuple will skip any result validation
fail_msg = next(
(exp for exp in expected if isinstance(exp, str)), None
)
expected_exception = next(
(
exp
for exp in expected
if isinstance(exp, type) and issubclass(exp, Exception)
),
None,
)
if expected_exception is not None:
with self.assertRaises(
expected_exception=expected_exception, msg=fail_msg or msg
):
if isinstance(result, Exception):
raise result
else:
expected_list = next(
(exp for exp in expected if isinstance(exp, list)), None
)
expected_dict = next(
(exp for exp in expected if isinstance(exp, dict)), None
)
if (expected_list, expected_dict) != (None, None):
self.assertParseResultsEquals(
result,
expected_list=expected_list,
expected_dict=expected_dict,
msg=fail_msg or msg,
)
else:
# warning here maybe?
print("no validation for {!r}".format(test_string))
# do this last, in case some specific test results can be reported instead
self.assertTrue(
run_test_success, msg=msg if msg is not None else "failed runTests"
)
@contextmanager
def assertRaisesParseException(self, exc_type=ParseException, msg=None):
with self.assertRaises(exc_type, msg=msg):
yield
@staticmethod
def with_line_numbers(
s: str,
start_line: typing.Optional[int] = None,
end_line: typing.Optional[int] = None,
expand_tabs: bool = True,
eol_mark: str = "|",
mark_spaces: typing.Optional[str] = None,
mark_control: typing.Optional[str] = None,
) -> str:
"""
Helpful method for debugging a parser - prints a string with line and column numbers.
(Line and column numbers are 1-based.)
:param s: tuple(bool, str - string to be printed with line and column numbers
:param start_line: int - (optional) starting line number in s to print (default=1)
:param end_line: int - (optional) ending line number in s to print (default=len(s))
:param expand_tabs: bool - (optional) expand tabs to spaces, to match the pyparsing default
:param eol_mark: str - (optional) string to mark the end of lines, helps visualize trailing spaces (default="|")
:param mark_spaces: str - (optional) special character to display in place of spaces
:param mark_control: str - (optional) convert non-printing control characters to a placeholding
character; valid values:
- "unicode" - replaces control chars with Unicode symbols, such as "" and ""
- any single character string - replace control characters with given string
- None (default) - string is displayed as-is
:return: str - input string with leading line numbers and column number headers
"""
if expand_tabs:
s = s.expandtabs()
if mark_control is not None:
if mark_control == "unicode":
tbl = str.maketrans(
{c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433))}
| {127: 0x2421}
)
eol_mark = ""
else:
tbl = str.maketrans(
{c: mark_control for c in list(range(0, 32)) + [127]}
)
s = s.translate(tbl)
if mark_spaces is not None and mark_spaces != " ":
if mark_spaces == "unicode":
tbl = str.maketrans({9: 0x2409, 32: 0x2423})
s = s.translate(tbl)
else:
s = s.replace(" ", mark_spaces)
if start_line is None:
start_line = 1
if end_line is None:
end_line = len(s)
end_line = min(end_line, len(s))
start_line = min(max(1, start_line), end_line)
if mark_control != "unicode":
s_lines = s.splitlines()[start_line - 1 : end_line]
else:
s_lines = [line + "" for line in s.split("")[start_line - 1 : end_line]]
if not s_lines:
return ""
lineno_width = len(str(end_line))
max_line_len = max(len(line) for line in s_lines)
lead = " " * (lineno_width + 1)
if max_line_len >= 99:
header0 = (
lead
+ "".join(
"{}{}".format(" " * 99, (i + 1) % 100)
for i in range(max(max_line_len // 100, 1))
)
+ "\n"
)
else:
header0 = ""
header1 = (
header0
+ lead
+ "".join(
" {}".format((i + 1) % 10)
for i in range(-(-max_line_len // 10))
)
+ "\n"
)
header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n"
return (
header1
+ header2
+ "\n".join(
"{:{}d}:{}{}".format(i, lineno_width, line, eol_mark)
for i, line in enumerate(s_lines, start=start_line)
)
+ "\n"
)

View file

@ -0,0 +1,352 @@
# unicode.py
import sys
from itertools import filterfalse
from typing import List, Tuple, Union
class _lazyclassproperty:
def __init__(self, fn):
self.fn = fn
self.__doc__ = fn.__doc__
self.__name__ = fn.__name__
def __get__(self, obj, cls):
if cls is None:
cls = type(obj)
if not hasattr(cls, "_intern") or any(
cls._intern is getattr(superclass, "_intern", [])
for superclass in cls.__mro__[1:]
):
cls._intern = {}
attrname = self.fn.__name__
if attrname not in cls._intern:
cls._intern[attrname] = self.fn(cls)
return cls._intern[attrname]
UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
class unicode_set:
"""
A set of Unicode characters, for language-specific strings for
``alphas``, ``nums``, ``alphanums``, and ``printables``.
A unicode_set is defined by a list of ranges in the Unicode character
set, in a class attribute ``_ranges``. Ranges can be specified using
2-tuples or a 1-tuple, such as::
_ranges = [
(0x0020, 0x007e),
(0x00a0, 0x00ff),
(0x0100,),
]
Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
A unicode set can also be defined using multiple inheritance of other unicode sets::
class CJK(Chinese, Japanese, Korean):
pass
"""
_ranges: UnicodeRangeList = []
@_lazyclassproperty
def _chars_for_ranges(cls):
ret = []
for cc in cls.__mro__:
if cc is unicode_set:
break
for rr in getattr(cc, "_ranges", ()):
ret.extend(range(rr[0], rr[-1] + 1))
return [chr(c) for c in sorted(set(ret))]
@_lazyclassproperty
def printables(cls):
"all non-whitespace characters in this range"
return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
@_lazyclassproperty
def alphas(cls):
"all alphabetic characters in this range"
return "".join(filter(str.isalpha, cls._chars_for_ranges))
@_lazyclassproperty
def nums(cls):
"all numeric digit characters in this range"
return "".join(filter(str.isdigit, cls._chars_for_ranges))
@_lazyclassproperty
def alphanums(cls):
"all alphanumeric characters in this range"
return cls.alphas + cls.nums
@_lazyclassproperty
def identchars(cls):
"all characters in this range that are valid identifier characters, plus underscore '_'"
return "".join(
sorted(
set(
"".join(filter(str.isidentifier, cls._chars_for_ranges))
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
+ "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
+ "_"
)
)
)
@_lazyclassproperty
def identbodychars(cls):
"""
all characters in this range that are valid identifier body characters,
plus the digits 0-9
"""
return "".join(
sorted(
set(
cls.identchars
+ "0123456789"
+ "".join(
[c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
)
)
)
)
class pyparsing_unicode(unicode_set):
"""
A namespace class for defining common language unicode_sets.
"""
# fmt: off
# define ranges in language character sets
_ranges: UnicodeRangeList = [
(0x0020, sys.maxunicode),
]
class BasicMultilingualPlane(unicode_set):
"Unicode set for the Basic Multilingual Plane"
_ranges: UnicodeRangeList = [
(0x0020, 0xFFFF),
]
class Latin1(unicode_set):
"Unicode set for Latin-1 Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x0020, 0x007E),
(0x00A0, 0x00FF),
]
class LatinA(unicode_set):
"Unicode set for Latin-A Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x0100, 0x017F),
]
class LatinB(unicode_set):
"Unicode set for Latin-B Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x0180, 0x024F),
]
class Greek(unicode_set):
"Unicode set for Greek Unicode Character Ranges"
_ranges: UnicodeRangeList = [
(0x0342, 0x0345),
(0x0370, 0x0377),
(0x037A, 0x037F),
(0x0384, 0x038A),
(0x038C,),
(0x038E, 0x03A1),
(0x03A3, 0x03E1),
(0x03F0, 0x03FF),
(0x1D26, 0x1D2A),
(0x1D5E,),
(0x1D60,),
(0x1D66, 0x1D6A),
(0x1F00, 0x1F15),
(0x1F18, 0x1F1D),
(0x1F20, 0x1F45),
(0x1F48, 0x1F4D),
(0x1F50, 0x1F57),
(0x1F59,),
(0x1F5B,),
(0x1F5D,),
(0x1F5F, 0x1F7D),
(0x1F80, 0x1FB4),
(0x1FB6, 0x1FC4),
(0x1FC6, 0x1FD3),
(0x1FD6, 0x1FDB),
(0x1FDD, 0x1FEF),
(0x1FF2, 0x1FF4),
(0x1FF6, 0x1FFE),
(0x2129,),
(0x2719, 0x271A),
(0xAB65,),
(0x10140, 0x1018D),
(0x101A0,),
(0x1D200, 0x1D245),
(0x1F7A1, 0x1F7A7),
]
class Cyrillic(unicode_set):
"Unicode set for Cyrillic Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x0400, 0x052F),
(0x1C80, 0x1C88),
(0x1D2B,),
(0x1D78,),
(0x2DE0, 0x2DFF),
(0xA640, 0xA672),
(0xA674, 0xA69F),
(0xFE2E, 0xFE2F),
]
class Chinese(unicode_set):
"Unicode set for Chinese Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x2E80, 0x2E99),
(0x2E9B, 0x2EF3),
(0x31C0, 0x31E3),
(0x3400, 0x4DB5),
(0x4E00, 0x9FEF),
(0xA700, 0xA707),
(0xF900, 0xFA6D),
(0xFA70, 0xFAD9),
(0x16FE2, 0x16FE3),
(0x1F210, 0x1F212),
(0x1F214, 0x1F23B),
(0x1F240, 0x1F248),
(0x20000, 0x2A6D6),
(0x2A700, 0x2B734),
(0x2B740, 0x2B81D),
(0x2B820, 0x2CEA1),
(0x2CEB0, 0x2EBE0),
(0x2F800, 0x2FA1D),
]
class Japanese(unicode_set):
"Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
_ranges: UnicodeRangeList = []
class Kanji(unicode_set):
"Unicode set for Kanji Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x4E00, 0x9FBF),
(0x3000, 0x303F),
]
class Hiragana(unicode_set):
"Unicode set for Hiragana Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x3041, 0x3096),
(0x3099, 0x30A0),
(0x30FC,),
(0xFF70,),
(0x1B001,),
(0x1B150, 0x1B152),
(0x1F200,),
]
class Katakana(unicode_set):
"Unicode set for Katakana Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x3099, 0x309C),
(0x30A0, 0x30FF),
(0x31F0, 0x31FF),
(0x32D0, 0x32FE),
(0xFF65, 0xFF9F),
(0x1B000,),
(0x1B164, 0x1B167),
(0x1F201, 0x1F202),
(0x1F213,),
]
class Hangul(unicode_set):
"Unicode set for Hangul (Korean) Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x1100, 0x11FF),
(0x302E, 0x302F),
(0x3131, 0x318E),
(0x3200, 0x321C),
(0x3260, 0x327B),
(0x327E,),
(0xA960, 0xA97C),
(0xAC00, 0xD7A3),
(0xD7B0, 0xD7C6),
(0xD7CB, 0xD7FB),
(0xFFA0, 0xFFBE),
(0xFFC2, 0xFFC7),
(0xFFCA, 0xFFCF),
(0xFFD2, 0xFFD7),
(0xFFDA, 0xFFDC),
]
Korean = Hangul
class CJK(Chinese, Japanese, Hangul):
"Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
class Thai(unicode_set):
"Unicode set for Thai Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x0E01, 0x0E3A),
(0x0E3F, 0x0E5B)
]
class Arabic(unicode_set):
"Unicode set for Arabic Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x0600, 0x061B),
(0x061E, 0x06FF),
(0x0700, 0x077F),
]
class Hebrew(unicode_set):
"Unicode set for Hebrew Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x0591, 0x05C7),
(0x05D0, 0x05EA),
(0x05EF, 0x05F4),
(0xFB1D, 0xFB36),
(0xFB38, 0xFB3C),
(0xFB3E,),
(0xFB40, 0xFB41),
(0xFB43, 0xFB44),
(0xFB46, 0xFB4F),
]
class Devanagari(unicode_set):
"Unicode set for Devanagari Unicode Character Range"
_ranges: UnicodeRangeList = [
(0x0900, 0x097F),
(0xA8E0, 0xA8FF)
]
# fmt: on
pyparsing_unicode.Japanese._ranges = (
pyparsing_unicode.Japanese.Kanji._ranges
+ pyparsing_unicode.Japanese.Hiragana._ranges
+ pyparsing_unicode.Japanese.Katakana._ranges
)
pyparsing_unicode.BMP = pyparsing_unicode.BasicMultilingualPlane
# add language identifiers using language Unicode
pyparsing_unicode.العربية = pyparsing_unicode.Arabic
pyparsing_unicode.中文 = pyparsing_unicode.Chinese
pyparsing_unicode.кириллица = pyparsing_unicode.Cyrillic
pyparsing_unicode.Ελληνικά = pyparsing_unicode.Greek
pyparsing_unicode.עִברִית = pyparsing_unicode.Hebrew
pyparsing_unicode.日本語 = pyparsing_unicode.Japanese
pyparsing_unicode.Japanese.漢字 = pyparsing_unicode.Japanese.Kanji
pyparsing_unicode.Japanese.カタカナ = pyparsing_unicode.Japanese.Katakana
pyparsing_unicode.Japanese.ひらがな = pyparsing_unicode.Japanese.Hiragana
pyparsing_unicode.한국어 = pyparsing_unicode.Korean
pyparsing_unicode.ไทย = pyparsing_unicode.Thai
pyparsing_unicode.वनगर = pyparsing_unicode.Devanagari

View file

@ -0,0 +1,235 @@
# util.py
import warnings
import types
import collections
import itertools
from functools import lru_cache
from typing import List, Union, Iterable
_bslash = chr(92)
class __config_flags:
"""Internal class for defining compatibility and debugging flags"""
_all_names: List[str] = []
_fixed_names: List[str] = []
_type_desc = "configuration"
@classmethod
def _set(cls, dname, value):
if dname in cls._fixed_names:
warnings.warn(
"{}.{} {} is {} and cannot be overridden".format(
cls.__name__,
dname,
cls._type_desc,
str(getattr(cls, dname)).upper(),
)
)
return
if dname in cls._all_names:
setattr(cls, dname, value)
else:
raise ValueError("no such {} {!r}".format(cls._type_desc, dname))
enable = classmethod(lambda cls, name: cls._set(name, True))
disable = classmethod(lambda cls, name: cls._set(name, False))
@lru_cache(maxsize=128)
def col(loc: int, strg: str) -> int:
"""
Returns current column within a string, counting newlines as line separators.
The first column is number 1.
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See
:class:`ParserElement.parseString` for more
information on parsing strings containing ``<TAB>`` s, and suggested
methods to maintain a consistent view of the parsed string, the parse
location, and line and column positions within the parsed string.
"""
s = strg
return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
@lru_cache(maxsize=128)
def lineno(loc: int, strg: str) -> int:
"""Returns current line number within a string, counting newlines as line separators.
The first line is number 1.
Note - the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See :class:`ParserElement.parseString`
for more information on parsing strings containing ``<TAB>`` s, and
suggested methods to maintain a consistent view of the parsed string, the
parse location, and line and column positions within the parsed string.
"""
return strg.count("\n", 0, loc) + 1
@lru_cache(maxsize=128)
def line(loc: int, strg: str) -> str:
"""
Returns the line of text containing loc within a string, counting newlines as line separators.
"""
last_cr = strg.rfind("\n", 0, loc)
next_cr = strg.find("\n", loc)
return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :]
class _UnboundedCache:
def __init__(self):
cache = {}
cache_get = cache.get
self.not_in_cache = not_in_cache = object()
def get(_, key):
return cache_get(key, not_in_cache)
def set_(_, key, value):
cache[key] = value
def clear(_):
cache.clear()
self.size = None
self.get = types.MethodType(get, self)
self.set = types.MethodType(set_, self)
self.clear = types.MethodType(clear, self)
class _FifoCache:
def __init__(self, size):
self.not_in_cache = not_in_cache = object()
cache = collections.OrderedDict()
cache_get = cache.get
def get(_, key):
return cache_get(key, not_in_cache)
def set_(_, key, value):
cache[key] = value
while len(cache) > size:
cache.popitem(last=False)
def clear(_):
cache.clear()
self.size = size
self.get = types.MethodType(get, self)
self.set = types.MethodType(set_, self)
self.clear = types.MethodType(clear, self)
class LRUMemo:
"""
A memoizing mapping that retains `capacity` deleted items
The memo tracks retained items by their access order; once `capacity` items
are retained, the least recently used item is discarded.
"""
def __init__(self, capacity):
self._capacity = capacity
self._active = {}
self._memory = collections.OrderedDict()
def __getitem__(self, key):
try:
return self._active[key]
except KeyError:
self._memory.move_to_end(key)
return self._memory[key]
def __setitem__(self, key, value):
self._memory.pop(key, None)
self._active[key] = value
def __delitem__(self, key):
try:
value = self._active.pop(key)
except KeyError:
pass
else:
while len(self._memory) >= self._capacity:
self._memory.popitem(last=False)
self._memory[key] = value
def clear(self):
self._active.clear()
self._memory.clear()
class UnboundedMemo(dict):
"""
A memoizing mapping that retains all deleted items
"""
def __delitem__(self, key):
pass
def _escape_regex_range_chars(s: str) -> str:
# escape these chars: ^-[]
for c in r"\^-[]":
s = s.replace(c, _bslash + c)
s = s.replace("\n", r"\n")
s = s.replace("\t", r"\t")
return str(s)
def _collapse_string_to_ranges(
s: Union[str, Iterable[str]], re_escape: bool = True
) -> str:
def is_consecutive(c):
c_int = ord(c)
is_consecutive.prev, prev = c_int, is_consecutive.prev
if c_int - prev > 1:
is_consecutive.value = next(is_consecutive.counter)
return is_consecutive.value
is_consecutive.prev = 0
is_consecutive.counter = itertools.count()
is_consecutive.value = -1
def escape_re_range_char(c):
return "\\" + c if c in r"\^-][" else c
def no_escape_re_range_char(c):
return c
if not re_escape:
escape_re_range_char = no_escape_re_range_char
ret = []
s = "".join(sorted(set(s)))
if len(s) > 3:
for _, chars in itertools.groupby(s, key=is_consecutive):
first = last = next(chars)
last = collections.deque(
itertools.chain(iter([last]), chars), maxlen=1
).pop()
if first == last:
ret.append(escape_re_range_char(first))
else:
sep = "" if ord(last) == ord(first) + 1 else "-"
ret.append(
"{}{}{}".format(
escape_re_range_char(first), sep, escape_re_range_char(last)
)
)
else:
ret = [escape_re_range_char(c) for c in s]
return "".join(ret)
def _flatten(ll: list) -> list:
ret = []
for i in ll:
if isinstance(i, list):
ret.extend(_flatten(i))
else:
ret.append(i)
return ret

View file

@ -0,0 +1,329 @@
import io
import posixpath
import zipfile
import itertools
import contextlib
import sys
import pathlib
if sys.version_info < (3, 7):
from collections import OrderedDict
else:
OrderedDict = dict
__all__ = ['Path']
def _parents(path):
"""
Given a path with elements separated by
posixpath.sep, generate all parents of that path.
>>> list(_parents('b/d'))
['b']
>>> list(_parents('/b/d/'))
['/b']
>>> list(_parents('b/d/f/'))
['b/d', 'b']
>>> list(_parents('b'))
[]
>>> list(_parents(''))
[]
"""
return itertools.islice(_ancestry(path), 1, None)
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
>>> list(_ancestry('b/d'))
['b/d', 'b']
>>> list(_ancestry('/b/d/'))
['/b/d', '/b']
>>> list(_ancestry('b/d/f/'))
['b/d/f', 'b/d', 'b']
>>> list(_ancestry('b'))
['b']
>>> list(_ancestry(''))
[]
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
yield path
path, tail = posixpath.split(path)
_dedupe = OrderedDict.fromkeys
"""Deduplicate an iterable in original order"""
def _difference(minuend, subtrahend):
"""
Return items in minuend not in subtrahend, retaining order
with O(1) lookup.
"""
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
class CompleteDirs(zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
"""
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
as_dirs = (p + posixpath.sep for p in parents)
return _dedupe(_difference(as_dirs, names))
def namelist(self):
names = super(CompleteDirs, self).namelist()
return names + list(self._implied_dirs(names))
def _name_set(self):
return set(self.namelist())
def resolve_dir(self, name):
"""
If the name represents a directory, return that name
as a directory (with the trailing slash).
"""
names = self._name_set()
dirname = name + '/'
dir_match = name not in names and dirname in names
return dirname if dir_match else name
@classmethod
def make(cls, source):
"""
Given a source (filename or zipfile), return an
appropriate CompleteDirs subclass.
"""
if isinstance(source, CompleteDirs):
return source
if not isinstance(source, zipfile.ZipFile):
return cls(_pathlib_compat(source))
# Only allow for FastLookup when supplied zipfile is read-only
if 'r' not in source.mode:
cls = CompleteDirs
source.__class__ = cls
return source
class FastLookup(CompleteDirs):
"""
ZipFile subclass to ensure implicit
dirs exist and are resolved rapidly.
"""
def namelist(self):
with contextlib.suppress(AttributeError):
return self.__names
self.__names = super(FastLookup, self).namelist()
return self.__names
def _name_set(self):
with contextlib.suppress(AttributeError):
return self.__lookup
self.__lookup = super(FastLookup, self)._name_set()
return self.__lookup
def _pathlib_compat(path):
"""
For path-like objects, convert to a filename for compatibility
on Python 3.6.1 and earlier.
"""
try:
return path.__fspath__()
except AttributeError:
return str(path)
class Path:
"""
A pathlib-compatible interface for zip files.
Consider a zip file with this structure::
.
a.txt
b
c.txt
d
e.txt
>>> data = io.BytesIO()
>>> zf = zipfile.ZipFile(data, 'w')
>>> zf.writestr('a.txt', 'content of a')
>>> zf.writestr('b/c.txt', 'content of c')
>>> zf.writestr('b/d/e.txt', 'content of e')
>>> zf.filename = 'mem/abcde.zip'
Path accepts the zipfile object itself or a filename
>>> root = Path(zf)
From there, several path operations are available.
Directory iteration (including the zip file itself):
>>> a, b = root.iterdir()
>>> a
Path('mem/abcde.zip', 'a.txt')
>>> b
Path('mem/abcde.zip', 'b/')
name property:
>>> b.name
'b'
join with divide operator:
>>> c = b / 'c.txt'
>>> c
Path('mem/abcde.zip', 'b/c.txt')
>>> c.name
'c.txt'
Read text:
>>> c.read_text()
'content of c'
existence:
>>> c.exists()
True
>>> (b / 'missing.txt').exists()
False
Coercion to string:
>>> import os
>>> str(c).replace(os.sep, posixpath.sep)
'mem/abcde.zip/b/c.txt'
At the root, ``name``, ``filename``, and ``parent``
resolve to the zipfile. Note these attributes are not
valid and will raise a ``ValueError`` if the zipfile
has no filename.
>>> root.name
'abcde.zip'
>>> str(root.filename).replace(os.sep, posixpath.sep)
'mem/abcde.zip'
>>> str(root.parent)
'mem'
"""
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
def __init__(self, root, at=""):
"""
Construct a Path from a ZipFile or filename.
Note: When the source is an existing ZipFile object,
its type (__class__) will be mutated to a
specialized type. If the caller wishes to retain the
original type, the caller should either create a
separate ZipFile object or pass a filename.
"""
self.root = FastLookup.make(root)
self.at = at
def open(self, mode='r', *args, pwd=None, **kwargs):
"""
Open this entry as text or binary following the semantics
of ``pathlib.Path.open()`` by passing arguments through
to io.TextIOWrapper().
"""
if self.is_dir():
raise IsADirectoryError(self)
zip_mode = mode[0]
if not self.exists() and zip_mode == 'r':
raise FileNotFoundError(self)
stream = self.root.open(self.at, zip_mode, pwd=pwd)
if 'b' in mode:
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
return io.TextIOWrapper(stream, *args, **kwargs)
@property
def name(self):
return pathlib.Path(self.at).name or self.filename.name
@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix
@property
def suffixes(self):
return pathlib.Path(self.at).suffixes or self.filename.suffixes
@property
def stem(self):
return pathlib.Path(self.at).stem or self.filename.stem
@property
def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)
def read_text(self, *args, **kwargs):
with self.open('r', *args, **kwargs) as strm:
return strm.read()
def read_bytes(self):
with self.open('rb') as strm:
return strm.read()
def _is_child(self, path):
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
def _next(self, at):
return self.__class__(self.root, at)
def is_dir(self):
return not self.at or self.at.endswith("/")
def is_file(self):
return self.exists() and not self.is_dir()
def exists(self):
return self.at in self.root._name_set()
def iterdir(self):
if not self.is_dir():
raise ValueError("Can't listdir a file")
subs = map(self._next, self.root.namelist())
return filter(self._is_child, subs)
def __str__(self):
return posixpath.join(self.root.filename, self.at)
def __repr__(self):
return self.__repr.format(self=self)
def joinpath(self, *other):
next = posixpath.join(self.at, *map(_pathlib_compat, other))
return self._next(self.root.resolve_dir(next))
__truediv__ = joinpath
@property
def parent(self):
if not self.at:
return self.filename.parent
parent_at = posixpath.dirname(self.at.rstrip('/'))
if parent_at:
parent_at += '/'
return self._next(parent_at)