summaryrefslogtreecommitdiff
path: root/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/webencodings/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to '.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/webencodings/__init__.py')
-rw-r--r--.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/webencodings/__init__.py342
1 files changed, 342 insertions, 0 deletions
diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/webencodings/__init__.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/webencodings/__init__.py
new file mode 100644
index 00000000..d21d697c
--- /dev/null
+++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/webencodings/__init__.py
@@ -0,0 +1,342 @@
+# coding: utf-8
+"""
+
+ webencodings
+ ~~~~~~~~~~~~
+
+ This is a Python implementation of the `WHATWG Encoding standard
+ <http://encoding.spec.whatwg.org/>`. See README for details.
+
+ :copyright: Copyright 2012 by Simon Sapin
+ :license: BSD, see LICENSE for details.
+
+"""
+
+from __future__ import unicode_literals
+
+import codecs
+
+from .labels import LABELS
+
+
+VERSION = '0.5.1'
+
+
+# Some names in Encoding are not valid Python aliases. Remap these.
+PYTHON_NAMES = {
+ 'iso-8859-8-i': 'iso-8859-8',
+ 'x-mac-cyrillic': 'mac-cyrillic',
+ 'macintosh': 'mac-roman',
+ 'windows-874': 'cp874'}
+
+CACHE = {}
+
+
+def ascii_lower(string):
+ r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.
+
+ :param string: An Unicode string.
+ :returns: A new Unicode string.
+
+ This is used for `ASCII case-insensitive
+ <http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_
+ matching of encoding labels.
+ The same matching is also used, among other things,
+ for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_.
+
+ This is different from the :meth:`~py:str.lower` method of Unicode strings
+ which also affect non-ASCII characters,
+ sometimes mapping them into the ASCII range:
+
+ >>> keyword = u'Bac\N{KELVIN SIGN}ground'
+ >>> assert keyword.lower() == u'background'
+ >>> assert ascii_lower(keyword) != keyword.lower()
+ >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'
+
+ """
+ # This turns out to be faster than unicode.translate()
+ return string.encode('utf8').lower().decode('utf8')
+
+
+def lookup(label):
+ """
+ Look for an encoding by its label.
+ This is the spec’s `get an encoding
+ <http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
+ Supported labels are listed there.
+
+ :param label: A string.
+ :returns:
+ An :class:`Encoding` object, or :obj:`None` for an unknown label.
+
+ """
+ # Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
+ label = ascii_lower(label.strip('\t\n\f\r '))
+ name = LABELS.get(label)
+ if name is None:
+ return None
+ encoding = CACHE.get(name)
+ if encoding is None:
+ if name == 'x-user-defined':
+ from .x_user_defined import codec_info
+ else:
+ python_name = PYTHON_NAMES.get(name, name)
+ # Any python_name value that gets to here should be valid.
+ codec_info = codecs.lookup(python_name)
+ encoding = Encoding(name, codec_info)
+ CACHE[name] = encoding
+ return encoding
+
+
+def _get_encoding(encoding_or_label):
+ """
+ Accept either an encoding object or label.
+
+ :param encoding: An :class:`Encoding` object or a label string.
+ :returns: An :class:`Encoding` object.
+ :raises: :exc:`~exceptions.LookupError` for an unknown label.
+
+ """
+ if hasattr(encoding_or_label, 'codec_info'):
+ return encoding_or_label
+
+ encoding = lookup(encoding_or_label)
+ if encoding is None:
+ raise LookupError('Unknown encoding label: %r' % encoding_or_label)
+ return encoding
+
+
+class Encoding(object):
+ """Reresents a character encoding such as UTF-8,
+ that can be used for decoding or encoding.
+
+ .. attribute:: name
+
+ Canonical name of the encoding
+
+ .. attribute:: codec_info
+
+ The actual implementation of the encoding,
+ a stdlib :class:`~codecs.CodecInfo` object.
+ See :func:`codecs.register`.
+
+ """
+ def __init__(self, name, codec_info):
+ self.name = name
+ self.codec_info = codec_info
+
+ def __repr__(self):
+ return '<Encoding %s>' % self.name
+
+
+#: The UTF-8 encoding. Should be used for new content and formats.
+UTF8 = lookup('utf-8')
+
+_UTF16LE = lookup('utf-16le')
+_UTF16BE = lookup('utf-16be')
+
+
+def decode(input, fallback_encoding, errors='replace'):
+ """
+ Decode a single string.
+
+ :param input: A byte string
+ :param fallback_encoding:
+ An :class:`Encoding` object or a label string.
+ The encoding to use if :obj:`input` does note have a BOM.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+ :return:
+ A ``(output, encoding)`` tuple of an Unicode string
+ and an :obj:`Encoding`.
+
+ """
+ # Fail early if `encoding` is an invalid label.
+ fallback_encoding = _get_encoding(fallback_encoding)
+ bom_encoding, input = _detect_bom(input)
+ encoding = bom_encoding or fallback_encoding
+ return encoding.codec_info.decode(input, errors)[0], encoding
+
+
+def _detect_bom(input):
+ """Return (bom_encoding, input), with any BOM removed from the input."""
+ if input.startswith(b'\xFF\xFE'):
+ return _UTF16LE, input[2:]
+ if input.startswith(b'\xFE\xFF'):
+ return _UTF16BE, input[2:]
+ if input.startswith(b'\xEF\xBB\xBF'):
+ return UTF8, input[3:]
+ return None, input
+
+
+def encode(input, encoding=UTF8, errors='strict'):
+ """
+ Encode a single string.
+
+ :param input: An Unicode string.
+ :param encoding: An :class:`Encoding` object or a label string.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+ :return: A byte string.
+
+ """
+ return _get_encoding(encoding).codec_info.encode(input, errors)[0]
+
+
+def iter_decode(input, fallback_encoding, errors='replace'):
+ """
+ "Pull"-based decoder.
+
+ :param input:
+ An iterable of byte strings.
+
+ The input is first consumed just enough to determine the encoding
+ based on the precense of a BOM,
+ then consumed on demand when the return value is.
+ :param fallback_encoding:
+ An :class:`Encoding` object or a label string.
+ The encoding to use if :obj:`input` does note have a BOM.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+ :returns:
+ An ``(output, encoding)`` tuple.
+ :obj:`output` is an iterable of Unicode strings,
+ :obj:`encoding` is the :obj:`Encoding` that is being used.
+
+ """
+
+ decoder = IncrementalDecoder(fallback_encoding, errors)
+ generator = _iter_decode_generator(input, decoder)
+ encoding = next(generator)
+ return generator, encoding
+
+
+def _iter_decode_generator(input, decoder):
+ """Return a generator that first yields the :obj:`Encoding`,
+ then yields output chukns as Unicode strings.
+
+ """
+ decode = decoder.decode
+ input = iter(input)
+ for chunck in input:
+ output = decode(chunck)
+ if output:
+ assert decoder.encoding is not None
+ yield decoder.encoding
+ yield output
+ break
+ else:
+ # Input exhausted without determining the encoding
+ output = decode(b'', final=True)
+ assert decoder.encoding is not None
+ yield decoder.encoding
+ if output:
+ yield output
+ return
+
+ for chunck in input:
+ output = decode(chunck)
+ if output:
+ yield output
+ output = decode(b'', final=True)
+ if output:
+ yield output
+
+
+def iter_encode(input, encoding=UTF8, errors='strict'):
+ """
+ “Pull”-based encoder.
+
+ :param input: An iterable of Unicode strings.
+ :param encoding: An :class:`Encoding` object or a label string.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+ :returns: An iterable of byte strings.
+
+ """
+ # Fail early if `encoding` is an invalid label.
+ encode = IncrementalEncoder(encoding, errors).encode
+ return _iter_encode_generator(input, encode)
+
+
+def _iter_encode_generator(input, encode):
+ for chunck in input:
+ output = encode(chunck)
+ if output:
+ yield output
+ output = encode('', final=True)
+ if output:
+ yield output
+
+
+class IncrementalDecoder(object):
+ """
+ “Push”-based decoder.
+
+ :param fallback_encoding:
+ An :class:`Encoding` object or a label string.
+ The encoding to use if :obj:`input` does note have a BOM.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+
+ """
+ def __init__(self, fallback_encoding, errors='replace'):
+ # Fail early if `encoding` is an invalid label.
+ self._fallback_encoding = _get_encoding(fallback_encoding)
+ self._errors = errors
+ self._buffer = b''
+ self._decoder = None
+ #: The actual :class:`Encoding` that is being used,
+ #: or :obj:`None` if that is not determined yet.
+ #: (Ie. if there is not enough input yet to determine
+ #: if there is a BOM.)
+ self.encoding = None # Not known yet.
+
+ def decode(self, input, final=False):
+ """Decode one chunk of the input.
+
+ :param input: A byte string.
+ :param final:
+ Indicate that no more input is available.
+ Must be :obj:`True` if this is the last call.
+ :returns: An Unicode string.
+
+ """
+ decoder = self._decoder
+ if decoder is not None:
+ return decoder(input, final)
+
+ input = self._buffer + input
+ encoding, input = _detect_bom(input)
+ if encoding is None:
+ if len(input) < 3 and not final: # Not enough data yet.
+ self._buffer = input
+ return ''
+ else: # No BOM
+ encoding = self._fallback_encoding
+ decoder = encoding.codec_info.incrementaldecoder(self._errors).decode
+ self._decoder = decoder
+ self.encoding = encoding
+ return decoder(input, final)
+
+
+class IncrementalEncoder(object):
+ """
+ “Push”-based encoder.
+
+ :param encoding: An :class:`Encoding` object or a label string.
+ :param errors: Type of error handling. See :func:`codecs.register`.
+ :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+
+ .. method:: encode(input, final=False)
+
+ :param input: An Unicode string.
+ :param final:
+ Indicate that no more input is available.
+ Must be :obj:`True` if this is the last call.
+ :returns: A byte string.
+
+ """
+ def __init__(self, encoding=UTF8, errors='strict'):
+ encoding = _get_encoding(encoding)
+ self.encode = encoding.codec_info.incrementalencoder(errors).encode