diff options
Diffstat (limited to '.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib')
68 files changed, 13196 insertions, 0 deletions
diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__init__.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__init__.py new file mode 100644 index 00000000..04912349 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__init__.py @@ -0,0 +1,35 @@ +""" +HTML parsing library based on the `WHATWG HTML specification +<https://whatwg.org/html>`_. The parser is designed to be compatible with +existing HTML found in the wild and implements well-defined error recovery that +is largely compatible with modern desktop web browsers. + +Example usage:: + + from pip._vendor import html5lib + with open("my_document.html", "rb") as f: + tree = html5lib.parse(f) + +For convenience, this module re-exports the following names: + +* :func:`~.html5parser.parse` +* :func:`~.html5parser.parseFragment` +* :class:`~.html5parser.HTMLParser` +* :func:`~.treebuilders.getTreeBuilder` +* :func:`~.treewalkers.getTreeWalker` +* :func:`~.serializer.serialize` +""" + +from __future__ import absolute_import, division, unicode_literals + +from .html5parser import HTMLParser, parse, parseFragment +from .treebuilders import getTreeBuilder +from .treewalkers import getTreeWalker +from .serializer import serialize + +__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", + "getTreeWalker", "serialize"] + +# this has to be at the top level, see how setup.py parses this +#: Distribution version number. +__version__ = "1.0.1" diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..8a1c72cd --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/__init__.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..86e4cbe4 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_ihatexml.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..96a0040d --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..c16c1bf4 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..3a84199f --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/_utils.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..222766fc --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/constants.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..84b10cb0 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/html5parser.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..8051998b --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/__pycache__/serializer.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_ihatexml.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_ihatexml.py new file mode 100644 index 00000000..4c77717b --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_ihatexml.py @@ -0,0 +1,288 @@ +from __future__ import absolute_import, division, unicode_literals + +import re +import warnings + +from .constants import DataLossWarning + +baseChar = """ +[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | +[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | +[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | +[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | +[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | +[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | +[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | +[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | +[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | +[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | +[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | +[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | +[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | +[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | +[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | +[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | +[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | +[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | +[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | +[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | +[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | +[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | +[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | +[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | +[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | +[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | +[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | +[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | +[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | +[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | +#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | +#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | +#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | +[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | +[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | +#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | +[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | +[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | +[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | +[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | +[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | +#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | +[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | +[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | +[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | +[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" + +ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" + +combiningCharacter = """ +[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | +[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | +[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | +[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | +#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | +[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | +[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | +#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | +[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | +[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | +#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | +[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | +[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | +[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | +[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | +[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | +#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | +[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | +#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | +[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | +[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | +#x3099 | #x309A""" + +digit = """ +[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | +[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | +[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | +[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" + +extender = """ +#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | +#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" + +letter = " | ".join([baseChar, ideographic]) + +# Without the +name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, + extender]) +nameFirst = " | ".join([letter, "_"]) + +reChar = re.compile(r"#x([\d|A-F]{4,4})") +reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") + + +def charStringToList(chars): + charRanges = [item.strip() for item in chars.split(" | ")] + rv = [] + for item in charRanges: + foundMatch = False + for regexp in (reChar, reCharRange): + match = regexp.match(item) + if match is not None: + rv.append([hexToInt(item) for item in match.groups()]) + if len(rv[-1]) == 1: + rv[-1] = rv[-1] * 2 + foundMatch = True + break + if not foundMatch: + assert len(item) == 1 + + rv.append([ord(item)] * 2) + rv = normaliseCharList(rv) + return rv + + +def normaliseCharList(charList): + charList = sorted(charList) + for item in charList: + assert item[1] >= item[0] + rv = [] + i = 0 + while i < len(charList): + j = 1 + rv.append(charList[i]) + while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: + rv[-1][1] = charList[i + j][1] + j += 1 + i += j + return rv + +# We don't really support characters above the BMP :( +max_unicode = int("FFFF", 16) + + +def missingRanges(charList): + rv = [] + if charList[0] != 0: + rv.append([0, charList[0][0] - 1]) + for i, item in enumerate(charList[:-1]): + rv.append([item[1] + 1, charList[i + 1][0] - 1]) + if charList[-1][1] != max_unicode: + rv.append([charList[-1][1] + 1, max_unicode]) + return rv + + +def listToRegexpStr(charList): + rv = [] + for item in charList: + if item[0] == item[1]: + rv.append(escapeRegexp(chr(item[0]))) + else: + rv.append(escapeRegexp(chr(item[0])) + "-" + + escapeRegexp(chr(item[1]))) + return "[%s]" % "".join(rv) + + +def hexToInt(hex_str): + return int(hex_str, 16) + + +def escapeRegexp(string): + specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", + "[", "]", "|", "(", ")", "-") + for char in specialCharacters: + string = string.replace(char, "\\" + char) + + return string + +# output from the above +nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa + +nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa + +# Simpler things +nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") + + +class InfosetFilter(object): + replacementRegexp = re.compile(r"U[\dA-F]{5,5}") + + def __init__(self, + dropXmlnsLocalName=False, + dropXmlnsAttrNs=False, + preventDoubleDashComments=False, + preventDashAtCommentEnd=False, + replaceFormFeedCharacters=True, + preventSingleQuotePubid=False): + + self.dropXmlnsLocalName = dropXmlnsLocalName + self.dropXmlnsAttrNs = dropXmlnsAttrNs + + self.preventDoubleDashComments = preventDoubleDashComments + self.preventDashAtCommentEnd = preventDashAtCommentEnd + + self.replaceFormFeedCharacters = replaceFormFeedCharacters + + self.preventSingleQuotePubid = preventSingleQuotePubid + + self.replaceCache = {} + + def coerceAttribute(self, name, namespace=None): + if self.dropXmlnsLocalName and name.startswith("xmlns:"): + warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) + return None + elif (self.dropXmlnsAttrNs and + namespace == "http://www.w3.org/2000/xmlns/"): + warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) + return None + else: + return self.toXmlName(name) + + def coerceElement(self, name): + return self.toXmlName(name) + + def coerceComment(self, data): + if self.preventDoubleDashComments: + while "--" in data: + warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) + data = data.replace("--", "- -") + if data.endswith("-"): + warnings.warn("Comments cannot end in a dash", DataLossWarning) + data += " " + return data + + def coerceCharacters(self, data): + if self.replaceFormFeedCharacters: + for _ in range(data.count("\x0C")): + warnings.warn("Text cannot contain U+000C", DataLossWarning) + data = data.replace("\x0C", " ") + # Other non-xml characters + return data + + def coercePubid(self, data): + dataOutput = data + for char in nonPubidCharRegexp.findall(data): + warnings.warn("Coercing non-XML pubid", DataLossWarning) + replacement = self.getReplacementCharacter(char) + dataOutput = dataOutput.replace(char, replacement) + if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: + warnings.warn("Pubid cannot contain single quote", DataLossWarning) + dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) + return dataOutput + + def toXmlName(self, name): + nameFirst = name[0] + nameRest = name[1:] + m = nonXmlNameFirstBMPRegexp.match(nameFirst) + if m: + warnings.warn("Coercing non-XML name", DataLossWarning) + nameFirstOutput = self.getReplacementCharacter(nameFirst) + else: + nameFirstOutput = nameFirst + + nameRestOutput = nameRest + replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) + for char in replaceChars: + warnings.warn("Coercing non-XML name", DataLossWarning) + replacement = self.getReplacementCharacter(char) + nameRestOutput = nameRestOutput.replace(char, replacement) + return nameFirstOutput + nameRestOutput + + def getReplacementCharacter(self, char): + if char in self.replaceCache: + replacement = self.replaceCache[char] + else: + replacement = self.escapeChar(char) + return replacement + + def fromXmlName(self, name): + for item in set(self.replacementRegexp.findall(name)): + name = name.replace(item, self.unescapeChar(item)) + return name + + def escapeChar(self, char): + replacement = "U%05X" % ord(char) + self.replaceCache[char] = replacement + return replacement + + def unescapeChar(self, charcode): + return chr(int(charcode[1:], 16)) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_inputstream.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_inputstream.py new file mode 100644 index 00000000..a65e55f6 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_inputstream.py @@ -0,0 +1,923 @@ +from __future__ import absolute_import, division, unicode_literals + +from pip._vendor.six import text_type, binary_type +from pip._vendor.six.moves import http_client, urllib + +import codecs +import re + +from pip._vendor import webencodings + +from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase +from .constants import _ReparseException +from . import _utils + +from io import StringIO + +try: + from io import BytesIO +except ImportError: + BytesIO = StringIO + +# Non-unicode versions of constants for use in the pre-parser +spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) +asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) +asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) +spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) + + +invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa + +if _utils.supports_lone_surrogates: + # Use one extra step of indirection and create surrogates with + # eval. Not using this indirection would introduce an illegal + # unicode literal on platforms not supporting such lone + # surrogates. + assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 + invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + + eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used + "]") +else: + invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) + +non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, + 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, + 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, + 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, + 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, + 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, + 0x10FFFE, 0x10FFFF]) + +ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") + +# Cache for charsUntil() +charsUntilRegEx = {} + + +class BufferedStream(object): + """Buffering for streams that do not have buffering of their own + + The buffer is implemented as a list of chunks on the assumption that + joining many strings will be slow since it is O(n**2) + """ + + def __init__(self, stream): + self.stream = stream + self.buffer = [] + self.position = [-1, 0] # chunk number, offset + + def tell(self): + pos = 0 + for chunk in self.buffer[:self.position[0]]: + pos += len(chunk) + pos += self.position[1] + return pos + + def seek(self, pos): + assert pos <= self._bufferedBytes() + offset = pos + i = 0 + while len(self.buffer[i]) < offset: + offset -= len(self.buffer[i]) + i += 1 + self.position = [i, offset] + + def read(self, bytes): + if not self.buffer: + return self._readStream(bytes) + elif (self.position[0] == len(self.buffer) and + self.position[1] == len(self.buffer[-1])): + return self._readStream(bytes) + else: + return self._readFromBuffer(bytes) + + def _bufferedBytes(self): + return sum([len(item) for item in self.buffer]) + + def _readStream(self, bytes): + data = self.stream.read(bytes) + self.buffer.append(data) + self.position[0] += 1 + self.position[1] = len(data) + return data + + def _readFromBuffer(self, bytes): + remainingBytes = bytes + rv = [] + bufferIndex = self.position[0] + bufferOffset = self.position[1] + while bufferIndex < len(self.buffer) and remainingBytes != 0: + assert remainingBytes > 0 + bufferedData = self.buffer[bufferIndex] + + if remainingBytes <= len(bufferedData) - bufferOffset: + bytesToRead = remainingBytes + self.position = [bufferIndex, bufferOffset + bytesToRead] + else: + bytesToRead = len(bufferedData) - bufferOffset + self.position = [bufferIndex, len(bufferedData)] + bufferIndex += 1 + rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) + remainingBytes -= bytesToRead + + bufferOffset = 0 + + if remainingBytes: + rv.append(self._readStream(remainingBytes)) + + return b"".join(rv) + + +def HTMLInputStream(source, **kwargs): + # Work around Python bug #20007: read(0) closes the connection. + # http://bugs.python.org/issue20007 + if (isinstance(source, http_client.HTTPResponse) or + # Also check for addinfourl wrapping HTTPResponse + (isinstance(source, urllib.response.addbase) and + isinstance(source.fp, http_client.HTTPResponse))): + isUnicode = False + elif hasattr(source, "read"): + isUnicode = isinstance(source.read(0), text_type) + else: + isUnicode = isinstance(source, text_type) + + if isUnicode: + encodings = [x for x in kwargs if x.endswith("_encoding")] + if encodings: + raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) + + return HTMLUnicodeInputStream(source, **kwargs) + else: + return HTMLBinaryInputStream(source, **kwargs) + + +class HTMLUnicodeInputStream(object): + """Provides a unicode stream of characters to the HTMLTokenizer. + + This class takes care of character encoding and removing or replacing + incorrect byte-sequences and also provides column and line tracking. + + """ + + _defaultChunkSize = 10240 + + def __init__(self, source): + """Initialises the HTMLInputStream. + + HTMLInputStream(source, [encoding]) -> Normalized stream from source + for use by html5lib. + + source can be either a file-object, local filename or a string. + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + + """ + + if not _utils.supports_lone_surrogates: + # Such platforms will have already checked for such + # surrogate errors, so no need to do this checking. + self.reportCharacterErrors = None + elif len("\U0010FFFF") == 1: + self.reportCharacterErrors = self.characterErrorsUCS4 + else: + self.reportCharacterErrors = self.characterErrorsUCS2 + + # List of where new lines occur + self.newLines = [0] + + self.charEncoding = (lookupEncoding("utf-8"), "certain") + self.dataStream = self.openStream(source) + + self.reset() + + def reset(self): + self.chunk = "" + self.chunkSize = 0 + self.chunkOffset = 0 + self.errors = [] + + # number of (complete) lines in previous chunks + self.prevNumLines = 0 + # number of columns in the last line of the previous chunk + self.prevNumCols = 0 + + # Deal with CR LF and surrogates split over chunk boundaries + self._bufferedCharacter = None + + def openStream(self, source): + """Produces a file object from source. + + source can be either a file object, local filename or a string. + + """ + # Already a file object + if hasattr(source, 'read'): + stream = source + else: + stream = StringIO(source) + + return stream + + def _position(self, offset): + chunk = self.chunk + nLines = chunk.count('\n', 0, offset) + positionLine = self.prevNumLines + nLines + lastLinePos = chunk.rfind('\n', 0, offset) + if lastLinePos == -1: + positionColumn = self.prevNumCols + offset + else: + positionColumn = offset - (lastLinePos + 1) + return (positionLine, positionColumn) + + def position(self): + """Returns (line, col) of the current position in the stream.""" + line, col = self._position(self.chunkOffset) + return (line + 1, col) + + def char(self): + """ Read one character from the stream or queue if available. Return + EOF when EOF is reached. + """ + # Read a new chunk from the input stream if necessary + if self.chunkOffset >= self.chunkSize: + if not self.readChunk(): + return EOF + + chunkOffset = self.chunkOffset + char = self.chunk[chunkOffset] + self.chunkOffset = chunkOffset + 1 + + return char + + def readChunk(self, chunkSize=None): + if chunkSize is None: + chunkSize = self._defaultChunkSize + + self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) + + self.chunk = "" + self.chunkSize = 0 + self.chunkOffset = 0 + + data = self.dataStream.read(chunkSize) + + # Deal with CR LF and surrogates broken across chunks + if self._bufferedCharacter: + data = self._bufferedCharacter + data + self._bufferedCharacter = None + elif not data: + # We have no more data, bye-bye stream + return False + + if len(data) > 1: + lastv = ord(data[-1]) + if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: + self._bufferedCharacter = data[-1] + data = data[:-1] + + if self.reportCharacterErrors: + self.reportCharacterErrors(data) + + # Replace invalid characters + data = data.replace("\r\n", "\n") + data = data.replace("\r", "\n") + + self.chunk = data + self.chunkSize = len(data) + + return True + + def characterErrorsUCS4(self, data): + for _ in range(len(invalid_unicode_re.findall(data))): + self.errors.append("invalid-codepoint") + + def characterErrorsUCS2(self, data): + # Someone picked the wrong compile option + # You lose + skip = False + for match in invalid_unicode_re.finditer(data): + if skip: + continue + codepoint = ord(match.group()) + pos = match.start() + # Pretty sure there should be endianness issues here + if _utils.isSurrogatePair(data[pos:pos + 2]): + # We have a surrogate pair! + char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) + if char_val in non_bmp_invalid_codepoints: + self.errors.append("invalid-codepoint") + skip = True + elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and + pos == len(data) - 1): + self.errors.append("invalid-codepoint") + else: + skip = False + self.errors.append("invalid-codepoint") + + def charsUntil(self, characters, opposite=False): + """ Returns a string of characters from the stream up to but not + including any character in 'characters' or EOF. 'characters' must be + a container that supports the 'in' method and iteration over its + characters. + """ + + # Use a cache of regexps to find the required characters + try: + chars = charsUntilRegEx[(characters, opposite)] + except KeyError: + if __debug__: + for c in characters: + assert(ord(c) < 128) + regex = "".join(["\\x%02x" % ord(c) for c in characters]) + if not opposite: + regex = "^%s" % regex + chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) + + rv = [] + + while True: + # Find the longest matching prefix + m = chars.match(self.chunk, self.chunkOffset) + if m is None: + # If nothing matched, and it wasn't because we ran out of chunk, + # then stop + if self.chunkOffset != self.chunkSize: + break + else: + end = m.end() + # If not the whole chunk matched, return everything + # up to the part that didn't match + if end != self.chunkSize: + rv.append(self.chunk[self.chunkOffset:end]) + self.chunkOffset = end + break + # If the whole remainder of the chunk matched, + # use it all and read the next chunk + rv.append(self.chunk[self.chunkOffset:]) + if not self.readChunk(): + # Reached EOF + break + + r = "".join(rv) + return r + + def unget(self, char): + # Only one character is allowed to be ungotten at once - it must + # be consumed again before any further call to unget + if char is not None: + if self.chunkOffset == 0: + # unget is called quite rarely, so it's a good idea to do + # more work here if it saves a bit of work in the frequently + # called char and charsUntil. + # So, just prepend the ungotten character onto the current + # chunk: + self.chunk = char + self.chunk + self.chunkSize += 1 + else: + self.chunkOffset -= 1 + assert self.chunk[self.chunkOffset] == char + + +class HTMLBinaryInputStream(HTMLUnicodeInputStream): + """Provides a unicode stream of characters to the HTMLTokenizer. + + This class takes care of character encoding and removing or replacing + incorrect byte-sequences and also provides column and line tracking. + + """ + + def __init__(self, source, override_encoding=None, transport_encoding=None, + same_origin_parent_encoding=None, likely_encoding=None, + default_encoding="windows-1252", useChardet=True): + """Initialises the HTMLInputStream. + + HTMLInputStream(source, [encoding]) -> Normalized stream from source + for use by html5lib. + + source can be either a file-object, local filename or a string. + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + + """ + # Raw Stream - for unicode objects this will encode to utf-8 and set + # self.charEncoding as appropriate + self.rawStream = self.openStream(source) + + HTMLUnicodeInputStream.__init__(self, self.rawStream) + + # Encoding Information + # Number of bytes to use when looking for a meta element with + # encoding information + self.numBytesMeta = 1024 + # Number of bytes to use when using detecting encoding using chardet + self.numBytesChardet = 100 + # Things from args + self.override_encoding = override_encoding + self.transport_encoding = transport_encoding + self.same_origin_parent_encoding = same_origin_parent_encoding + self.likely_encoding = likely_encoding + self.default_encoding = default_encoding + + # Determine encoding + self.charEncoding = self.determineEncoding(useChardet) + assert self.charEncoding[0] is not None + + # Call superclass + self.reset() + + def reset(self): + self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') + HTMLUnicodeInputStream.reset(self) + + def openStream(self, source): + """Produces a file object from source. + + source can be either a file object, local filename or a string. + + """ + # Already a file object + if hasattr(source, 'read'): + stream = source + else: + stream = BytesIO(source) + + try: + stream.seek(stream.tell()) + except: # pylint:disable=bare-except + stream = BufferedStream(stream) + + return stream + + def determineEncoding(self, chardet=True): + # BOMs take precedence over everything + # This will also read past the BOM if present + charEncoding = self.detectBOM(), "certain" + if charEncoding[0] is not None: + return charEncoding + + # If we've been overriden, we've been overriden + charEncoding = lookupEncoding(self.override_encoding), "certain" + if charEncoding[0] is not None: + return charEncoding + + # Now check the transport layer + charEncoding = lookupEncoding(self.transport_encoding), "certain" + if charEncoding[0] is not None: + return charEncoding + + # Look for meta elements with encoding information + charEncoding = self.detectEncodingMeta(), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Parent document encoding + charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" + if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): + return charEncoding + + # "likely" encoding + charEncoding = lookupEncoding(self.likely_encoding), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Guess with chardet, if available + if chardet: + try: + from pip._vendor.chardet.universaldetector import UniversalDetector + except ImportError: + pass + else: + buffers = [] + detector = UniversalDetector() + while not detector.done: + buffer = self.rawStream.read(self.numBytesChardet) + assert isinstance(buffer, bytes) + if not buffer: + break + buffers.append(buffer) + detector.feed(buffer) + detector.close() + encoding = lookupEncoding(detector.result['encoding']) + self.rawStream.seek(0) + if encoding is not None: + return encoding, "tentative" + + # Try the default encoding + charEncoding = lookupEncoding(self.default_encoding), "tentative" + if charEncoding[0] is not None: + return charEncoding + + # Fallback to html5lib's default if even that hasn't worked + return lookupEncoding("windows-1252"), "tentative" + + def changeEncoding(self, newEncoding): + assert self.charEncoding[1] != "certain" + newEncoding = lookupEncoding(newEncoding) + if newEncoding is None: + return + if newEncoding.name in ("utf-16be", "utf-16le"): + newEncoding = lookupEncoding("utf-8") + assert newEncoding is not None + elif newEncoding == self.charEncoding[0]: + self.charEncoding = (self.charEncoding[0], "certain") + else: + self.rawStream.seek(0) + self.charEncoding = (newEncoding, "certain") + self.reset() + raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) + + def detectBOM(self): + """Attempts to detect at BOM at the start of the stream. If + an encoding can be determined from the BOM return the name of the + encoding otherwise return None""" + bomDict = { + codecs.BOM_UTF8: 'utf-8', + codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', + codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' + } + + # Go to beginning of file and read in 4 bytes + string = self.rawStream.read(4) + assert isinstance(string, bytes) + + # Try detecting the BOM using bytes from the string + encoding = bomDict.get(string[:3]) # UTF-8 + seek = 3 + if not encoding: + # Need to detect UTF-32 before UTF-16 + encoding = bomDict.get(string) # UTF-32 + seek = 4 + if not encoding: + encoding = bomDict.get(string[:2]) # UTF-16 + seek = 2 + + # Set the read position past the BOM if one was found, otherwise + # set it to the start of the stream + if encoding: + self.rawStream.seek(seek) + return lookupEncoding(encoding) + else: + self.rawStream.seek(0) + return None + + def detectEncodingMeta(self): + """Report the encoding declared by the meta element + """ + buffer = self.rawStream.read(self.numBytesMeta) + assert isinstance(buffer, bytes) + parser = EncodingParser(buffer) + self.rawStream.seek(0) + encoding = parser.getEncoding() + + if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): + encoding = lookupEncoding("utf-8") + + return encoding + + +class EncodingBytes(bytes): + """String-like object with an associated position and various extra methods + If the position is ever greater than the string length then an exception is + raised""" + def __new__(self, value): + assert isinstance(value, bytes) + return bytes.__new__(self, value.lower()) + + def __init__(self, value): + # pylint:disable=unused-argument + self._position = -1 + + def __iter__(self): + return self + + def __next__(self): + p = self._position = self._position + 1 + if p >= len(self): + raise StopIteration + elif p < 0: + raise TypeError + return self[p:p + 1] + + def next(self): + # Py2 compat + return self.__next__() + + def previous(self): + p = self._position + if p >= len(self): + raise StopIteration + elif p < 0: + raise TypeError + self._position = p = p - 1 + return self[p:p + 1] + + def setPosition(self, position): + if self._position >= len(self): + raise StopIteration + self._position = position + + def getPosition(self): + if self._position >= len(self): + raise StopIteration + if self._position >= 0: + return self._position + else: + return None + + position = property(getPosition, setPosition) + + def getCurrentByte(self): + return self[self.position:self.position + 1] + + currentByte = property(getCurrentByte) + + def skip(self, chars=spaceCharactersBytes): + """Skip past a list of characters""" + p = self.position # use property for the error-checking + while p < len(self): + c = self[p:p + 1] + if c not in chars: + self._position = p + return c + p += 1 + self._position = p + return None + + def skipUntil(self, chars): + p = self.position + while p < len(self): + c = self[p:p + 1] + if c in chars: + self._position = p + return c + p += 1 + self._position = p + return None + + def matchBytes(self, bytes): + """Look for a sequence of bytes at the start of a string. If the bytes + are found return True and advance the position to the byte after the + match. Otherwise return False and leave the position alone""" + p = self.position + data = self[p:p + len(bytes)] + rv = data.startswith(bytes) + if rv: + self.position += len(bytes) + return rv + + def jumpTo(self, bytes): + """Look for the next sequence of bytes matching a given sequence. If + a match is found advance the position to the last byte of the match""" + newPosition = self[self.position:].find(bytes) + if newPosition > -1: + # XXX: This is ugly, but I can't see a nicer way to fix this. + if self._position == -1: + self._position = 0 + self._position += (newPosition + len(bytes) - 1) + return True + else: + raise StopIteration + + +class EncodingParser(object): + """Mini parser for detecting character encoding from meta elements""" + + def __init__(self, data): + """string - the data to work on for encoding detection""" + self.data = EncodingBytes(data) + self.encoding = None + + def getEncoding(self): + methodDispatch = ( + (b"<!--", self.handleComment), + (b"<meta", self.handleMeta), + (b"</", self.handlePossibleEndTag), + (b"<!", self.handleOther), + (b"<?", self.handleOther), + (b"<", self.handlePossibleStartTag)) + for _ in self.data: + keepParsing = True + for key, method in methodDispatch: + if self.data.matchBytes(key): + try: + keepParsing = method() + break + except StopIteration: + keepParsing = False + break + if not keepParsing: + break + + return self.encoding + + def handleComment(self): + """Skip over comments""" + return self.data.jumpTo(b"-->") + + def handleMeta(self): + if self.data.currentByte not in spaceCharactersBytes: + # if we have <meta not followed by a space so just keep going + return True + # We have a valid meta element we want to search for attributes + hasPragma = False + pendingEncoding = None + while True: + # Try to find the next attribute after the current position + attr = self.getAttribute() + if attr is None: + return True + else: + if attr[0] == b"http-equiv": + hasPragma = attr[1] == b"content-type" + if hasPragma and pendingEncoding is not None: + self.encoding = pendingEncoding + return False + elif attr[0] == b"charset": + tentativeEncoding = attr[1] + codec = lookupEncoding(tentativeEncoding) + if codec is not None: + self.encoding = codec + return False + elif attr[0] == b"content": + contentParser = ContentAttrParser(EncodingBytes(attr[1])) + tentativeEncoding = contentParser.parse() + if tentativeEncoding is not None: + codec = lookupEncoding(tentativeEncoding) + if codec is not None: + if hasPragma: + self.encoding = codec + return False + else: + pendingEncoding = codec + + def handlePossibleStartTag(self): + return self.handlePossibleTag(False) + + def handlePossibleEndTag(self): + next(self.data) + return self.handlePossibleTag(True) + + def handlePossibleTag(self, endTag): + data = self.data + if data.currentByte not in asciiLettersBytes: + # If the next byte is not an ascii letter either ignore this + # fragment (possible start tag case) or treat it according to + # handleOther + if endTag: + data.previous() + self.handleOther() + return True + + c = data.skipUntil(spacesAngleBrackets) + if c == b"<": + # return to the first step in the overall "two step" algorithm + # reprocessing the < byte + data.previous() + else: + # Read all attributes + attr = self.getAttribute() + while attr is not None: + attr = self.getAttribute() + return True + + def handleOther(self): + return self.data.jumpTo(b">") + + def getAttribute(self): + """Return a name,value pair for the next attribute in the stream, + if one is found, or None""" + data = self.data + # Step 1 (skip chars) + c = data.skip(spaceCharactersBytes | frozenset([b"/"])) + assert c is None or len(c) == 1 + # Step 2 + if c in (b">", None): + return None + # Step 3 + attrName = [] + attrValue = [] + # Step 4 attribute name + while True: + if c == b"=" and attrName: + break + elif c in spaceCharactersBytes: + # Step 6! + c = data.skip() + break + elif c in (b"/", b">"): + return b"".join(attrName), b"" + elif c in asciiUppercaseBytes: + attrName.append(c.lower()) + elif c is None: + return None + else: + attrName.append(c) + # Step 5 + c = next(data) + # Step 7 + if c != b"=": + data.previous() + return b"".join(attrName), b"" + # Step 8 + next(data) + # Step 9 + c = data.skip() + # Step 10 + if c in (b"'", b'"'): + # 10.1 + quoteChar = c + while True: + # 10.2 + c = next(data) + # 10.3 + if c == quoteChar: + next(data) + return b"".join(attrName), b"".join(attrValue) + # 10.4 + elif c in asciiUppercaseBytes: + attrValue.append(c.lower()) + # 10.5 + else: + attrValue.append(c) + elif c == b">": + return b"".join(attrName), b"" + elif c in asciiUppercaseBytes: + attrValue.append(c.lower()) + elif c is None: + return None + else: + attrValue.append(c) + # Step 11 + while True: + c = next(data) + if c in spacesAngleBrackets: + return b"".join(attrName), b"".join(attrValue) + elif c in asciiUppercaseBytes: + attrValue.append(c.lower()) + elif c is None: + return None + else: + attrValue.append(c) + + +class ContentAttrParser(object): + def __init__(self, data): + assert isinstance(data, bytes) + self.data = data + + def parse(self): + try: + # Check if the attr name is charset + # otherwise return + self.data.jumpTo(b"charset") + self.data.position += 1 + self.data.skip() + if not self.data.currentByte == b"=": + # If there is no = sign keep looking for attrs + return None + self.data.position += 1 + self.data.skip() + # Look for an encoding between matching quote marks + if self.data.currentByte in (b'"', b"'"): + quoteMark = self.data.currentByte + self.data.position += 1 + oldPosition = self.data.position + if self.data.jumpTo(quoteMark): + return self.data[oldPosition:self.data.position] + else: + return None + else: + # Unquoted value + oldPosition = self.data.position + try: + self.data.skipUntil(spaceCharactersBytes) + return self.data[oldPosition:self.data.position] + except StopIteration: + # Return the whole remaining value + return self.data[oldPosition:] + except StopIteration: + return None + + +def lookupEncoding(encoding): + """Return the python codec name corresponding to an encoding or None if the + string doesn't correspond to a valid encoding.""" + if isinstance(encoding, binary_type): + try: + encoding = encoding.decode("ascii") + except UnicodeDecodeError: + return None + + if encoding is not None: + try: + return webencodings.lookup(encoding) + except AttributeError: + return None + else: + return None diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_tokenizer.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_tokenizer.py new file mode 100644 index 00000000..178f6e7f --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_tokenizer.py @@ -0,0 +1,1721 @@ +from __future__ import absolute_import, division, unicode_literals + +from pip._vendor.six import unichr as chr + +from collections import deque + +from .constants import spaceCharacters +from .constants import entities +from .constants import asciiLetters, asciiUpper2Lower +from .constants import digits, hexDigits, EOF +from .constants import tokenTypes, tagTokenTypes +from .constants import replacementCharacters + +from ._inputstream import HTMLInputStream + +from ._trie import Trie + +entitiesTrie = Trie(entities) + + +class HTMLTokenizer(object): + """ This class takes care of tokenizing HTML. + + * self.currentToken + Holds the token that is currently being processed. + + * self.state + Holds a reference to the method to be invoked... XXX + + * self.stream + Points to HTMLInputStream object. + """ + + def __init__(self, stream, parser=None, **kwargs): + + self.stream = HTMLInputStream(stream, **kwargs) + self.parser = parser + + # Setup the initial tokenizer state + self.escapeFlag = False + self.lastFourChars = [] + self.state = self.dataState + self.escape = False + + # The current token being created + self.currentToken = None + super(HTMLTokenizer, self).__init__() + + def __iter__(self): + """ This is where the magic happens. + + We do our usually processing through the states and when we have a token + to return we yield the token which pauses processing until the next token + is requested. + """ + self.tokenQueue = deque([]) + # Start processing. When EOF is reached self.state will return False + # instead of True and the loop will terminate. + while self.state(): + while self.stream.errors: + yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} + while self.tokenQueue: + yield self.tokenQueue.popleft() + + def consumeNumberEntity(self, isHex): + """This function returns either U+FFFD or the character based on the + decimal or hexadecimal representation. It also discards ";" if present. + If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. + """ + + allowed = digits + radix = 10 + if isHex: + allowed = hexDigits + radix = 16 + + charStack = [] + + # Consume all the characters that are in range while making sure we + # don't hit an EOF. + c = self.stream.char() + while c in allowed and c is not EOF: + charStack.append(c) + c = self.stream.char() + + # Convert the set of characters consumed to an int. + charAsInt = int("".join(charStack), radix) + + # Certain characters get replaced with others + if charAsInt in replacementCharacters: + char = replacementCharacters[charAsInt] + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "illegal-codepoint-for-numeric-entity", + "datavars": {"charAsInt": charAsInt}}) + elif ((0xD800 <= charAsInt <= 0xDFFF) or + (charAsInt > 0x10FFFF)): + char = "\uFFFD" + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "illegal-codepoint-for-numeric-entity", + "datavars": {"charAsInt": charAsInt}}) + else: + # Should speed up this check somehow (e.g. move the set to a constant) + if ((0x0001 <= charAsInt <= 0x0008) or + (0x000E <= charAsInt <= 0x001F) or + (0x007F <= charAsInt <= 0x009F) or + (0xFDD0 <= charAsInt <= 0xFDEF) or + charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, + 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, + 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, + 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, + 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, + 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, + 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, + 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, + 0xFFFFF, 0x10FFFE, 0x10FFFF])): + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": + "illegal-codepoint-for-numeric-entity", + "datavars": {"charAsInt": charAsInt}}) + try: + # Try/except needed as UCS-2 Python builds' unichar only works + # within the BMP. + char = chr(charAsInt) + except ValueError: + v = charAsInt - 0x10000 + char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) + + # Discard the ; if present. Otherwise, put it back on the queue and + # invoke parseError on parser. + if c != ";": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "numeric-entity-without-semicolon"}) + self.stream.unget(c) + + return char + + def consumeEntity(self, allowedChar=None, fromAttribute=False): + # Initialise to the default output for when no entity is matched + output = "&" + + charStack = [self.stream.char()] + if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or + (allowedChar is not None and allowedChar == charStack[0])): + self.stream.unget(charStack[0]) + + elif charStack[0] == "#": + # Read the next character to see if it's hex or decimal + hex = False + charStack.append(self.stream.char()) + if charStack[-1] in ("x", "X"): + hex = True + charStack.append(self.stream.char()) + + # charStack[-1] should be the first digit + if (hex and charStack[-1] in hexDigits) \ + or (not hex and charStack[-1] in digits): + # At least one digit found, so consume the whole number + self.stream.unget(charStack[-1]) + output = self.consumeNumberEntity(hex) + else: + # No digits found + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "expected-numeric-entity"}) + self.stream.unget(charStack.pop()) + output = "&" + "".join(charStack) + + else: + # At this point in the process might have named entity. Entities + # are stored in the global variable "entities". + # + # Consume characters and compare to these to a substring of the + # entity names in the list until the substring no longer matches. + while (charStack[-1] is not EOF): + if not entitiesTrie.has_keys_with_prefix("".join(charStack)): + break + charStack.append(self.stream.char()) + + # At this point we have a string that starts with some characters + # that may match an entity + # Try to find the longest entity the string will match to take care + # of ¬i for instance. + try: + entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) + entityLength = len(entityName) + except KeyError: + entityName = None + + if entityName is not None: + if entityName[-1] != ";": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "named-entity-without-semicolon"}) + if (entityName[-1] != ";" and fromAttribute and + (charStack[entityLength] in asciiLetters or + charStack[entityLength] in digits or + charStack[entityLength] == "=")): + self.stream.unget(charStack.pop()) + output = "&" + "".join(charStack) + else: + output = entities[entityName] + self.stream.unget(charStack.pop()) + output += "".join(charStack[entityLength:]) + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-named-entity"}) + self.stream.unget(charStack.pop()) + output = "&" + "".join(charStack) + + if fromAttribute: + self.currentToken["data"][-1][1] += output + else: + if output in spaceCharacters: + tokenType = "SpaceCharacters" + else: + tokenType = "Characters" + self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) + + def processEntityInAttribute(self, allowedChar): + """This method replaces the need for "entityInAttributeValueState". + """ + self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) + + def emitCurrentToken(self): + """This method is a generic handler for emitting the tags. It also sets + the state to "data" because that's what's needed after a token has been + emitted. + """ + token = self.currentToken + # Add token to the queue to be yielded + if (token["type"] in tagTokenTypes): + token["name"] = token["name"].translate(asciiUpper2Lower) + if token["type"] == tokenTypes["EndTag"]: + if token["data"]: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "attributes-in-end-tag"}) + if token["selfClosing"]: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "self-closing-flag-on-end-tag"}) + self.tokenQueue.append(token) + self.state = self.dataState + + # Below are the various tokenizer states worked out. + def dataState(self): + data = self.stream.char() + if data == "&": + self.state = self.entityDataState + elif data == "<": + self.state = self.tagOpenState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\u0000"}) + elif data is EOF: + # Tokenization ends. + return False + elif data in spaceCharacters: + # Directly after emitting a token you switch back to the "data + # state". At that point spaceCharacters are important so they are + # emitted separately. + self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": + data + self.stream.charsUntil(spaceCharacters, True)}) + # No need to update lastFourChars here, since the first space will + # have already been appended to lastFourChars and will have broken + # any <!-- or --> sequences + else: + chars = self.stream.charsUntil(("&", "<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def entityDataState(self): + self.consumeEntity() + self.state = self.dataState + return True + + def rcdataState(self): + data = self.stream.char() + if data == "&": + self.state = self.characterReferenceInRcdata + elif data == "<": + self.state = self.rcdataLessThanSignState + elif data == EOF: + # Tokenization ends. + return False + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data in spaceCharacters: + # Directly after emitting a token you switch back to the "data + # state". At that point spaceCharacters are important so they are + # emitted separately. + self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": + data + self.stream.charsUntil(spaceCharacters, True)}) + # No need to update lastFourChars here, since the first space will + # have already been appended to lastFourChars and will have broken + # any <!-- or --> sequences + else: + chars = self.stream.charsUntil(("&", "<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def characterReferenceInRcdata(self): + self.consumeEntity() + self.state = self.rcdataState + return True + + def rawtextState(self): + data = self.stream.char() + if data == "<": + self.state = self.rawtextLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data == EOF: + # Tokenization ends. + return False + else: + chars = self.stream.charsUntil(("<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def scriptDataState(self): + data = self.stream.char() + if data == "<": + self.state = self.scriptDataLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data == EOF: + # Tokenization ends. + return False + else: + chars = self.stream.charsUntil(("<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def plaintextState(self): + data = self.stream.char() + if data == EOF: + # Tokenization ends. + return False + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + self.stream.charsUntil("\u0000")}) + return True + + def tagOpenState(self): + data = self.stream.char() + if data == "!": + self.state = self.markupDeclarationOpenState + elif data == "/": + self.state = self.closeTagOpenState + elif data in asciiLetters: + self.currentToken = {"type": tokenTypes["StartTag"], + "name": data, "data": [], + "selfClosing": False, + "selfClosingAcknowledged": False} + self.state = self.tagNameState + elif data == ">": + # XXX In theory it could be something besides a tag name. But + # do we really care? + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-tag-name-but-got-right-bracket"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) + self.state = self.dataState + elif data == "?": + # XXX In theory it could be something besides a tag name. But + # do we really care? + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-tag-name-but-got-question-mark"}) + self.stream.unget(data) + self.state = self.bogusCommentState + else: + # XXX + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-tag-name"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.stream.unget(data) + self.state = self.dataState + return True + + def closeTagOpenState(self): + data = self.stream.char() + if data in asciiLetters: + self.currentToken = {"type": tokenTypes["EndTag"], "name": data, + "data": [], "selfClosing": False} + self.state = self.tagNameState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-closing-tag-but-got-right-bracket"}) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-closing-tag-but-got-eof"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) + self.state = self.dataState + else: + # XXX data can be _'_... + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-closing-tag-but-got-char", + "datavars": {"data": data}}) + self.stream.unget(data) + self.state = self.bogusCommentState + return True + + def tagNameState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeAttributeNameState + elif data == ">": + self.emitCurrentToken() + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-tag-name"}) + self.state = self.dataState + elif data == "/": + self.state = self.selfClosingStartTagState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["name"] += "\uFFFD" + else: + self.currentToken["name"] += data + # (Don't use charsUntil here, because tag names are + # very short and it's faster to not do anything fancy) + return True + + def rcdataLessThanSignState(self): + data = self.stream.char() + if data == "/": + self.temporaryBuffer = "" + self.state = self.rcdataEndTagOpenState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.stream.unget(data) + self.state = self.rcdataState + return True + + def rcdataEndTagOpenState(self): + data = self.stream.char() + if data in asciiLetters: + self.temporaryBuffer += data + self.state = self.rcdataEndTagNameState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) + self.stream.unget(data) + self.state = self.rcdataState + return True + + def rcdataEndTagNameState(self): + appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() + data = self.stream.char() + if data in spaceCharacters and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.state = self.beforeAttributeNameState + elif data == "/" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.state = self.selfClosingStartTagState + elif data == ">" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.emitCurrentToken() + self.state = self.dataState + elif data in asciiLetters: + self.temporaryBuffer += data + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "</" + self.temporaryBuffer}) + self.stream.unget(data) + self.state = self.rcdataState + return True + + def rawtextLessThanSignState(self): + data = self.stream.char() + if data == "/": + self.temporaryBuffer = "" + self.state = self.rawtextEndTagOpenState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.stream.unget(data) + self.state = self.rawtextState + return True + + def rawtextEndTagOpenState(self): + data = self.stream.char() + if data in asciiLetters: + self.temporaryBuffer += data + self.state = self.rawtextEndTagNameState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) + self.stream.unget(data) + self.state = self.rawtextState + return True + + def rawtextEndTagNameState(self): + appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() + data = self.stream.char() + if data in spaceCharacters and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.state = self.beforeAttributeNameState + elif data == "/" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.state = self.selfClosingStartTagState + elif data == ">" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.emitCurrentToken() + self.state = self.dataState + elif data in asciiLetters: + self.temporaryBuffer += data + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "</" + self.temporaryBuffer}) + self.stream.unget(data) + self.state = self.rawtextState + return True + + def scriptDataLessThanSignState(self): + data = self.stream.char() + if data == "/": + self.temporaryBuffer = "" + self.state = self.scriptDataEndTagOpenState + elif data == "!": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"}) + self.state = self.scriptDataEscapeStartState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.stream.unget(data) + self.state = self.scriptDataState + return True + + def scriptDataEndTagOpenState(self): + data = self.stream.char() + if data in asciiLetters: + self.temporaryBuffer += data + self.state = self.scriptDataEndTagNameState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) + self.stream.unget(data) + self.state = self.scriptDataState + return True + + def scriptDataEndTagNameState(self): + appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() + data = self.stream.char() + if data in spaceCharacters and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.state = self.beforeAttributeNameState + elif data == "/" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.state = self.selfClosingStartTagState + elif data == ">" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.emitCurrentToken() + self.state = self.dataState + elif data in asciiLetters: + self.temporaryBuffer += data + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "</" + self.temporaryBuffer}) + self.stream.unget(data) + self.state = self.scriptDataState + return True + + def scriptDataEscapeStartState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + self.state = self.scriptDataEscapeStartDashState + else: + self.stream.unget(data) + self.state = self.scriptDataState + return True + + def scriptDataEscapeStartDashState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + self.state = self.scriptDataEscapedDashDashState + else: + self.stream.unget(data) + self.state = self.scriptDataState + return True + + def scriptDataEscapedState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + self.state = self.scriptDataEscapedDashState + elif data == "<": + self.state = self.scriptDataEscapedLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data == EOF: + self.state = self.dataState + else: + chars = self.stream.charsUntil(("<", "-", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + return True + + def scriptDataEscapedDashState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + self.state = self.scriptDataEscapedDashDashState + elif data == "<": + self.state = self.scriptDataEscapedLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + self.state = self.scriptDataEscapedState + elif data == EOF: + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.state = self.scriptDataEscapedState + return True + + def scriptDataEscapedDashDashState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + elif data == "<": + self.state = self.scriptDataEscapedLessThanSignState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) + self.state = self.scriptDataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + self.state = self.scriptDataEscapedState + elif data == EOF: + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.state = self.scriptDataEscapedState + return True + + def scriptDataEscapedLessThanSignState(self): + data = self.stream.char() + if data == "/": + self.temporaryBuffer = "" + self.state = self.scriptDataEscapedEndTagOpenState + elif data in asciiLetters: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) + self.temporaryBuffer = data + self.state = self.scriptDataDoubleEscapeStartState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.stream.unget(data) + self.state = self.scriptDataEscapedState + return True + + def scriptDataEscapedEndTagOpenState(self): + data = self.stream.char() + if data in asciiLetters: + self.temporaryBuffer = data + self.state = self.scriptDataEscapedEndTagNameState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) + self.stream.unget(data) + self.state = self.scriptDataEscapedState + return True + + def scriptDataEscapedEndTagNameState(self): + appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() + data = self.stream.char() + if data in spaceCharacters and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.state = self.beforeAttributeNameState + elif data == "/" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.state = self.selfClosingStartTagState + elif data == ">" and appropriate: + self.currentToken = {"type": tokenTypes["EndTag"], + "name": self.temporaryBuffer, + "data": [], "selfClosing": False} + self.emitCurrentToken() + self.state = self.dataState + elif data in asciiLetters: + self.temporaryBuffer += data + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "</" + self.temporaryBuffer}) + self.stream.unget(data) + self.state = self.scriptDataEscapedState + return True + + def scriptDataDoubleEscapeStartState(self): + data = self.stream.char() + if data in (spaceCharacters | frozenset(("/", ">"))): + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + if self.temporaryBuffer.lower() == "script": + self.state = self.scriptDataDoubleEscapedState + else: + self.state = self.scriptDataEscapedState + elif data in asciiLetters: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.temporaryBuffer += data + else: + self.stream.unget(data) + self.state = self.scriptDataEscapedState + return True + + def scriptDataDoubleEscapedState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + self.state = self.scriptDataDoubleEscapedDashState + elif data == "<": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.state = self.scriptDataDoubleEscapedLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + elif data == EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-script-in-script"}) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + return True + + def scriptDataDoubleEscapedDashState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + self.state = self.scriptDataDoubleEscapedDashDashState + elif data == "<": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.state = self.scriptDataDoubleEscapedLessThanSignState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + self.state = self.scriptDataDoubleEscapedState + elif data == EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-script-in-script"}) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.state = self.scriptDataDoubleEscapedState + return True + + def scriptDataDoubleEscapedDashDashState(self): + data = self.stream.char() + if data == "-": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) + elif data == "<": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) + self.state = self.scriptDataDoubleEscapedLessThanSignState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) + self.state = self.scriptDataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) + self.state = self.scriptDataDoubleEscapedState + elif data == EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-script-in-script"}) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.state = self.scriptDataDoubleEscapedState + return True + + def scriptDataDoubleEscapedLessThanSignState(self): + data = self.stream.char() + if data == "/": + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) + self.temporaryBuffer = "" + self.state = self.scriptDataDoubleEscapeEndState + else: + self.stream.unget(data) + self.state = self.scriptDataDoubleEscapedState + return True + + def scriptDataDoubleEscapeEndState(self): + data = self.stream.char() + if data in (spaceCharacters | frozenset(("/", ">"))): + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + if self.temporaryBuffer.lower() == "script": + self.state = self.scriptDataEscapedState + else: + self.state = self.scriptDataDoubleEscapedState + elif data in asciiLetters: + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) + self.temporaryBuffer += data + else: + self.stream.unget(data) + self.state = self.scriptDataDoubleEscapedState + return True + + def beforeAttributeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + self.stream.charsUntil(spaceCharacters, True) + elif data in asciiLetters: + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + elif data == ">": + self.emitCurrentToken() + elif data == "/": + self.state = self.selfClosingStartTagState + elif data in ("'", '"', "=", "<"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "invalid-character-in-attribute-name"}) + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"].append(["\uFFFD", ""]) + self.state = self.attributeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-attribute-name-but-got-eof"}) + self.state = self.dataState + else: + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + return True + + def attributeNameState(self): + data = self.stream.char() + leavingThisState = True + emitToken = False + if data == "=": + self.state = self.beforeAttributeValueState + elif data in asciiLetters: + self.currentToken["data"][-1][0] += data +\ + self.stream.charsUntil(asciiLetters, True) + leavingThisState = False + elif data == ">": + # XXX If we emit here the attributes are converted to a dict + # without being checked and when the code below runs we error + # because data is a dict not a list + emitToken = True + elif data in spaceCharacters: + self.state = self.afterAttributeNameState + elif data == "/": + self.state = self.selfClosingStartTagState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][0] += "\uFFFD" + leavingThisState = False + elif data in ("'", '"', "<"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": + "invalid-character-in-attribute-name"}) + self.currentToken["data"][-1][0] += data + leavingThisState = False + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "eof-in-attribute-name"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][0] += data + leavingThisState = False + + if leavingThisState: + # Attributes are not dropped at this stage. That happens when the + # start tag token is emitted so values can still be safely appended + # to attributes, but we do want to report the parse error in time. + self.currentToken["data"][-1][0] = ( + self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) + for name, _ in self.currentToken["data"][:-1]: + if self.currentToken["data"][-1][0] == name: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "duplicate-attribute"}) + break + # XXX Fix for above XXX + if emitToken: + self.emitCurrentToken() + return True + + def afterAttributeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + self.stream.charsUntil(spaceCharacters, True) + elif data == "=": + self.state = self.beforeAttributeValueState + elif data == ">": + self.emitCurrentToken() + elif data in asciiLetters: + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + elif data == "/": + self.state = self.selfClosingStartTagState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"].append(["\uFFFD", ""]) + self.state = self.attributeNameState + elif data in ("'", '"', "<"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "invalid-character-after-attribute-name"}) + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-end-of-tag-but-got-eof"}) + self.state = self.dataState + else: + self.currentToken["data"].append([data, ""]) + self.state = self.attributeNameState + return True + + def beforeAttributeValueState(self): + data = self.stream.char() + if data in spaceCharacters: + self.stream.charsUntil(spaceCharacters, True) + elif data == "\"": + self.state = self.attributeValueDoubleQuotedState + elif data == "&": + self.state = self.attributeValueUnQuotedState + self.stream.unget(data) + elif data == "'": + self.state = self.attributeValueSingleQuotedState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-attribute-value-but-got-right-bracket"}) + self.emitCurrentToken() + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][1] += "\uFFFD" + self.state = self.attributeValueUnQuotedState + elif data in ("=", "<", "`"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "equals-in-unquoted-attribute-value"}) + self.currentToken["data"][-1][1] += data + self.state = self.attributeValueUnQuotedState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-attribute-value-but-got-eof"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][1] += data + self.state = self.attributeValueUnQuotedState + return True + + def attributeValueDoubleQuotedState(self): + data = self.stream.char() + if data == "\"": + self.state = self.afterAttributeValueState + elif data == "&": + self.processEntityInAttribute('"') + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][1] += "\uFFFD" + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-attribute-value-double-quote"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][1] += data +\ + self.stream.charsUntil(("\"", "&", "\u0000")) + return True + + def attributeValueSingleQuotedState(self): + data = self.stream.char() + if data == "'": + self.state = self.afterAttributeValueState + elif data == "&": + self.processEntityInAttribute("'") + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][1] += "\uFFFD" + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-attribute-value-single-quote"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][1] += data +\ + self.stream.charsUntil(("'", "&", "\u0000")) + return True + + def attributeValueUnQuotedState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeAttributeNameState + elif data == "&": + self.processEntityInAttribute(">") + elif data == ">": + self.emitCurrentToken() + elif data in ('"', "'", "=", "<", "`"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-character-in-unquoted-attribute-value"}) + self.currentToken["data"][-1][1] += data + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"][-1][1] += "\uFFFD" + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-attribute-value-no-quotes"}) + self.state = self.dataState + else: + self.currentToken["data"][-1][1] += data + self.stream.charsUntil( + frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) + return True + + def afterAttributeValueState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeAttributeNameState + elif data == ">": + self.emitCurrentToken() + elif data == "/": + self.state = self.selfClosingStartTagState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-EOF-after-attribute-value"}) + self.stream.unget(data) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-character-after-attribute-value"}) + self.stream.unget(data) + self.state = self.beforeAttributeNameState + return True + + def selfClosingStartTagState(self): + data = self.stream.char() + if data == ">": + self.currentToken["selfClosing"] = True + self.emitCurrentToken() + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": + "unexpected-EOF-after-solidus-in-tag"}) + self.stream.unget(data) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-character-after-solidus-in-tag"}) + self.stream.unget(data) + self.state = self.beforeAttributeNameState + return True + + def bogusCommentState(self): + # Make a new comment token and give it as value all the characters + # until the first > or EOF (charsUntil checks for EOF automatically) + # and emit it. + data = self.stream.charsUntil(">") + data = data.replace("\u0000", "\uFFFD") + self.tokenQueue.append( + {"type": tokenTypes["Comment"], "data": data}) + + # Eat the character directly after the bogus comment which is either a + # ">" or an EOF. + self.stream.char() + self.state = self.dataState + return True + + def markupDeclarationOpenState(self): + charStack = [self.stream.char()] + if charStack[-1] == "-": + charStack.append(self.stream.char()) + if charStack[-1] == "-": + self.currentToken = {"type": tokenTypes["Comment"], "data": ""} + self.state = self.commentStartState + return True + elif charStack[-1] in ('d', 'D'): + matched = True + for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), + ('y', 'Y'), ('p', 'P'), ('e', 'E')): + charStack.append(self.stream.char()) + if charStack[-1] not in expected: + matched = False + break + if matched: + self.currentToken = {"type": tokenTypes["Doctype"], + "name": "", + "publicId": None, "systemId": None, + "correct": True} + self.state = self.doctypeState + return True + elif (charStack[-1] == "[" and + self.parser is not None and + self.parser.tree.openElements and + self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): + matched = True + for expected in ["C", "D", "A", "T", "A", "["]: + charStack.append(self.stream.char()) + if charStack[-1] != expected: + matched = False + break + if matched: + self.state = self.cdataSectionState + return True + + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-dashes-or-doctype"}) + + while charStack: + self.stream.unget(charStack.pop()) + self.state = self.bogusCommentState + return True + + def commentStartState(self): + data = self.stream.char() + if data == "-": + self.state = self.commentStartDashState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "incorrect-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += data + self.state = self.commentState + return True + + def commentStartDashState(self): + data = self.stream.char() + if data == "-": + self.state = self.commentEndState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "-\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "incorrect-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += "-" + data + self.state = self.commentState + return True + + def commentState(self): + data = self.stream.char() + if data == "-": + self.state = self.commentEndDashState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "\uFFFD" + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "eof-in-comment"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += data + \ + self.stream.charsUntil(("-", "\u0000")) + return True + + def commentEndDashState(self): + data = self.stream.char() + if data == "-": + self.state = self.commentEndState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "-\uFFFD" + self.state = self.commentState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment-end-dash"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += "-" + data + self.state = self.commentState + return True + + def commentEndState(self): + data = self.stream.char() + if data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "--\uFFFD" + self.state = self.commentState + elif data == "!": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-bang-after-double-dash-in-comment"}) + self.state = self.commentEndBangState + elif data == "-": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-dash-after-double-dash-in-comment"}) + self.currentToken["data"] += data + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment-double-dash"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + # XXX + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-comment"}) + self.currentToken["data"] += "--" + data + self.state = self.commentState + return True + + def commentEndBangState(self): + data = self.stream.char() + if data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == "-": + self.currentToken["data"] += "--!" + self.state = self.commentEndDashState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["data"] += "--!\uFFFD" + self.state = self.commentState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-comment-end-bang-state"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["data"] += "--!" + data + self.state = self.commentState + return True + + def doctypeState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeDoctypeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-doctype-name-but-got-eof"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "need-space-after-doctype"}) + self.stream.unget(data) + self.state = self.beforeDoctypeNameState + return True + + def beforeDoctypeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-doctype-name-but-got-right-bracket"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["name"] = "\uFFFD" + self.state = self.doctypeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-doctype-name-but-got-eof"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["name"] = data + self.state = self.doctypeNameState + return True + + def doctypeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) + self.state = self.afterDoctypeNameState + elif data == ">": + self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["name"] += "\uFFFD" + self.state = self.doctypeNameState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype-name"}) + self.currentToken["correct"] = False + self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["name"] += data + return True + + def afterDoctypeNameState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.currentToken["correct"] = False + self.stream.unget(data) + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + if data in ("p", "P"): + matched = True + for expected in (("u", "U"), ("b", "B"), ("l", "L"), + ("i", "I"), ("c", "C")): + data = self.stream.char() + if data not in expected: + matched = False + break + if matched: + self.state = self.afterDoctypePublicKeywordState + return True + elif data in ("s", "S"): + matched = True + for expected in (("y", "Y"), ("s", "S"), ("t", "T"), + ("e", "E"), ("m", "M")): + data = self.stream.char() + if data not in expected: + matched = False + break + if matched: + self.state = self.afterDoctypeSystemKeywordState + return True + + # All the characters read before the current 'data' will be + # [a-zA-Z], so they're garbage in the bogus doctype and can be + # discarded; only the latest character might be '>' or EOF + # and needs to be ungetted + self.stream.unget(data) + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-space-or-right-bracket-in-doctype", "datavars": + {"data": data}}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + + return True + + def afterDoctypePublicKeywordState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeDoctypePublicIdentifierState + elif data in ("'", '"'): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.stream.unget(data) + self.state = self.beforeDoctypePublicIdentifierState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.stream.unget(data) + self.state = self.beforeDoctypePublicIdentifierState + return True + + def beforeDoctypePublicIdentifierState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == "\"": + self.currentToken["publicId"] = "" + self.state = self.doctypePublicIdentifierDoubleQuotedState + elif data == "'": + self.currentToken["publicId"] = "" + self.state = self.doctypePublicIdentifierSingleQuotedState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + return True + + def doctypePublicIdentifierDoubleQuotedState(self): + data = self.stream.char() + if data == "\"": + self.state = self.afterDoctypePublicIdentifierState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["publicId"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["publicId"] += data + return True + + def doctypePublicIdentifierSingleQuotedState(self): + data = self.stream.char() + if data == "'": + self.state = self.afterDoctypePublicIdentifierState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["publicId"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["publicId"] += data + return True + + def afterDoctypePublicIdentifierState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.betweenDoctypePublicAndSystemIdentifiersState + elif data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == '"': + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierDoubleQuotedState + elif data == "'": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierSingleQuotedState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + return True + + def betweenDoctypePublicAndSystemIdentifiersState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data == '"': + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierDoubleQuotedState + elif data == "'": + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierSingleQuotedState + elif data == EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + return True + + def afterDoctypeSystemKeywordState(self): + data = self.stream.char() + if data in spaceCharacters: + self.state = self.beforeDoctypeSystemIdentifierState + elif data in ("'", '"'): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.stream.unget(data) + self.state = self.beforeDoctypeSystemIdentifierState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.stream.unget(data) + self.state = self.beforeDoctypeSystemIdentifierState + return True + + def beforeDoctypeSystemIdentifierState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == "\"": + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierDoubleQuotedState + elif data == "'": + self.currentToken["systemId"] = "" + self.state = self.doctypeSystemIdentifierSingleQuotedState + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.currentToken["correct"] = False + self.state = self.bogusDoctypeState + return True + + def doctypeSystemIdentifierDoubleQuotedState(self): + data = self.stream.char() + if data == "\"": + self.state = self.afterDoctypeSystemIdentifierState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["systemId"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["systemId"] += data + return True + + def doctypeSystemIdentifierSingleQuotedState(self): + data = self.stream.char() + if data == "'": + self.state = self.afterDoctypeSystemIdentifierState + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.currentToken["systemId"] += "\uFFFD" + elif data == ">": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-end-of-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.currentToken["systemId"] += data + return True + + def afterDoctypeSystemIdentifierState(self): + data = self.stream.char() + if data in spaceCharacters: + pass + elif data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-doctype"}) + self.currentToken["correct"] = False + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "unexpected-char-in-doctype"}) + self.state = self.bogusDoctypeState + return True + + def bogusDoctypeState(self): + data = self.stream.char() + if data == ">": + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + elif data is EOF: + # XXX EMIT + self.stream.unget(data) + self.tokenQueue.append(self.currentToken) + self.state = self.dataState + else: + pass + return True + + def cdataSectionState(self): + data = [] + while True: + data.append(self.stream.charsUntil("]")) + data.append(self.stream.charsUntil(">")) + char = self.stream.char() + if char == EOF: + break + else: + assert char == ">" + if data[-1][-2:] == "]]": + data[-1] = data[-1][:-2] + break + else: + data.append(char) + + data = "".join(data) # pylint:disable=redefined-variable-type + # Deal with null here rather than in the parser + nullCount = data.count("\u0000") + if nullCount > 0: + for _ in range(nullCount): + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + data = data.replace("\u0000", "\uFFFD") + if data: + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": data}) + self.state = self.dataState + return True diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__init__.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__init__.py new file mode 100644 index 00000000..a5ba4bf1 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__init__.py @@ -0,0 +1,14 @@ +from __future__ import absolute_import, division, unicode_literals + +from .py import Trie as PyTrie + +Trie = PyTrie + +# pylint:disable=wrong-import-position +try: + from .datrie import Trie as DATrie +except ImportError: + pass +else: + Trie = DATrie +# pylint:enable=wrong-import-position diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..bb097565 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/__init__.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..2b98eddb --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/_base.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..f55827e6 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/datrie.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..f0d0aa03 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/__pycache__/py.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/_base.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/_base.py new file mode 100644 index 00000000..6b71975f --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/_base.py @@ -0,0 +1,40 @@ +from __future__ import absolute_import, division, unicode_literals + +try: + from collections.abc import Mapping +except ImportError: # Python 2.7 + from collections import Mapping + + +class Trie(Mapping): + """Abstract base class for tries""" + + def keys(self, prefix=None): + # pylint:disable=arguments-differ + keys = super(Trie, self).keys() + + if prefix is None: + return set(keys) + + return {x for x in keys if x.startswith(prefix)} + + def has_keys_with_prefix(self, prefix): + for key in self.keys(): + if key.startswith(prefix): + return True + + return False + + def longest_prefix(self, prefix): + if prefix in self: + return prefix + + for i in range(1, len(prefix) + 1): + if prefix[:-i] in self: + return prefix[:-i] + + raise KeyError(prefix) + + def longest_prefix_item(self, prefix): + lprefix = self.longest_prefix(prefix) + return (lprefix, self[lprefix]) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/datrie.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/datrie.py new file mode 100644 index 00000000..e2e5f866 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/datrie.py @@ -0,0 +1,44 @@ +from __future__ import absolute_import, division, unicode_literals + +from datrie import Trie as DATrie +from pip._vendor.six import text_type + +from ._base import Trie as ABCTrie + + +class Trie(ABCTrie): + def __init__(self, data): + chars = set() + for key in data.keys(): + if not isinstance(key, text_type): + raise TypeError("All keys must be strings") + for char in key: + chars.add(char) + + self._data = DATrie("".join(chars)) + for key, value in data.items(): + self._data[key] = value + + def __contains__(self, key): + return key in self._data + + def __len__(self): + return len(self._data) + + def __iter__(self): + raise NotImplementedError() + + def __getitem__(self, key): + return self._data[key] + + def keys(self, prefix=None): + return self._data.keys(prefix) + + def has_keys_with_prefix(self, prefix): + return self._data.has_keys_with_prefix(prefix) + + def longest_prefix(self, prefix): + return self._data.longest_prefix(prefix) + + def longest_prefix_item(self, prefix): + return self._data.longest_prefix_item(prefix) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/py.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/py.py new file mode 100644 index 00000000..c178b219 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_trie/py.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import text_type + +from bisect import bisect_left + +from ._base import Trie as ABCTrie + + +class Trie(ABCTrie): + def __init__(self, data): + if not all(isinstance(x, text_type) for x in data.keys()): + raise TypeError("All keys must be strings") + + self._data = data + self._keys = sorted(data.keys()) + self._cachestr = "" + self._cachepoints = (0, len(data)) + + def __contains__(self, key): + return key in self._data + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) + + def __getitem__(self, key): + return self._data[key] + + def keys(self, prefix=None): + if prefix is None or prefix == "" or not self._keys: + return set(self._keys) + + if prefix.startswith(self._cachestr): + lo, hi = self._cachepoints + start = i = bisect_left(self._keys, prefix, lo, hi) + else: + start = i = bisect_left(self._keys, prefix) + + keys = set() + if start == len(self._keys): + return keys + + while self._keys[i].startswith(prefix): + keys.add(self._keys[i]) + i += 1 + + self._cachestr = prefix + self._cachepoints = (start, i) + + return keys + + def has_keys_with_prefix(self, prefix): + if prefix in self._data: + return True + + if prefix.startswith(self._cachestr): + lo, hi = self._cachepoints + i = bisect_left(self._keys, prefix, lo, hi) + else: + i = bisect_left(self._keys, prefix) + + if i == len(self._keys): + return False + + return self._keys[i].startswith(prefix) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_utils.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_utils.py new file mode 100644 index 00000000..0703afb3 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/_utils.py @@ -0,0 +1,124 @@ +from __future__ import absolute_import, division, unicode_literals + +from types import ModuleType + +from pip._vendor.six import text_type + +try: + import xml.etree.cElementTree as default_etree +except ImportError: + import xml.etree.ElementTree as default_etree + + +__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", + "surrogatePairToCodepoint", "moduleFactoryFactory", + "supports_lone_surrogates"] + + +# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be +# caught by the below test. In general this would be any platform +# using UTF-16 as its encoding of unicode strings, such as +# Jython. This is because UTF-16 itself is based on the use of such +# surrogates, and there is no mechanism to further escape such +# escapes. +try: + _x = eval('"\\uD800"') # pylint:disable=eval-used + if not isinstance(_x, text_type): + # We need this with u"" because of http://bugs.jython.org/issue2039 + _x = eval('u"\\uD800"') # pylint:disable=eval-used + assert isinstance(_x, text_type) +except: # pylint:disable=bare-except + supports_lone_surrogates = False +else: + supports_lone_surrogates = True + + +class MethodDispatcher(dict): + """Dict with 2 special properties: + + On initiation, keys that are lists, sets or tuples are converted to + multiple keys so accessing any one of the items in the original + list-like object returns the matching value + + md = MethodDispatcher({("foo", "bar"):"baz"}) + md["foo"] == "baz" + + A default value which can be set through the default attribute. + """ + + def __init__(self, items=()): + # Using _dictEntries instead of directly assigning to self is about + # twice as fast. Please do careful performance testing before changing + # anything here. + _dictEntries = [] + for name, value in items: + if isinstance(name, (list, tuple, frozenset, set)): + for item in name: + _dictEntries.append((item, value)) + else: + _dictEntries.append((name, value)) + dict.__init__(self, _dictEntries) + assert len(self) == len(_dictEntries) + self.default = None + + def __getitem__(self, key): + return dict.get(self, key, self.default) + + +# Some utility functions to deal with weirdness around UCS2 vs UCS4 +# python builds + +def isSurrogatePair(data): + return (len(data) == 2 and + ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and + ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) + + +def surrogatePairToCodepoint(data): + char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + + (ord(data[1]) - 0xDC00)) + return char_val + +# Module Factory Factory (no, this isn't Java, I know) +# Here to stop this being duplicated all over the place. + + +def moduleFactoryFactory(factory): + moduleCache = {} + + def moduleFactory(baseModule, *args, **kwargs): + if isinstance(ModuleType.__name__, type("")): + name = "_%s_factory" % baseModule.__name__ + else: + name = b"_%s_factory" % baseModule.__name__ + + kwargs_tuple = tuple(kwargs.items()) + + try: + return moduleCache[name][args][kwargs_tuple] + except KeyError: + mod = ModuleType(name) + objs = factory(baseModule, *args, **kwargs) + mod.__dict__.update(objs) + if "name" not in moduleCache: + moduleCache[name] = {} + if "args" not in moduleCache[name]: + moduleCache[name][args] = {} + if "kwargs" not in moduleCache[name][args]: + moduleCache[name][args][kwargs_tuple] = {} + moduleCache[name][args][kwargs_tuple] = mod + return mod + + return moduleFactory + + +def memoize(func): + cache = {} + + def wrapped(*args, **kwargs): + key = (tuple(args), tuple(kwargs.items())) + if key not in cache: + cache[key] = func(*args, **kwargs) + return cache[key] + + return wrapped diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/constants.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/constants.py new file mode 100644 index 00000000..1ff80419 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/constants.py @@ -0,0 +1,2947 @@ +from __future__ import absolute_import, division, unicode_literals + +import string + +EOF = None + +E = { + "null-character": + "Null character in input stream, replaced with U+FFFD.", + "invalid-codepoint": + "Invalid codepoint in stream.", + "incorrectly-placed-solidus": + "Solidus (/) incorrectly placed in tag.", + "incorrect-cr-newline-entity": + "Incorrect CR newline entity, replaced with LF.", + "illegal-windows-1252-entity": + "Entity used with illegal number (windows-1252 reference).", + "cant-convert-numeric-entity": + "Numeric entity couldn't be converted to character " + "(codepoint U+%(charAsInt)08x).", + "illegal-codepoint-for-numeric-entity": + "Numeric entity represents an illegal codepoint: " + "U+%(charAsInt)08x.", + "numeric-entity-without-semicolon": + "Numeric entity didn't end with ';'.", + "expected-numeric-entity-but-got-eof": + "Numeric entity expected. Got end of file instead.", + "expected-numeric-entity": + "Numeric entity expected but none found.", + "named-entity-without-semicolon": + "Named entity didn't end with ';'.", + "expected-named-entity": + "Named entity expected. Got none.", + "attributes-in-end-tag": + "End tag contains unexpected attributes.", + 'self-closing-flag-on-end-tag': + "End tag contains unexpected self-closing flag.", + "expected-tag-name-but-got-right-bracket": + "Expected tag name. Got '>' instead.", + "expected-tag-name-but-got-question-mark": + "Expected tag name. Got '?' instead. (HTML doesn't " + "support processing instructions.)", + "expected-tag-name": + "Expected tag name. Got something else instead", + "expected-closing-tag-but-got-right-bracket": + "Expected closing tag. Got '>' instead. Ignoring '</>'.", + "expected-closing-tag-but-got-eof": + "Expected closing tag. Unexpected end of file.", + "expected-closing-tag-but-got-char": + "Expected closing tag. Unexpected character '%(data)s' found.", + "eof-in-tag-name": + "Unexpected end of file in the tag name.", + "expected-attribute-name-but-got-eof": + "Unexpected end of file. Expected attribute name instead.", + "eof-in-attribute-name": + "Unexpected end of file in attribute name.", + "invalid-character-in-attribute-name": + "Invalid character in attribute name", + "duplicate-attribute": + "Dropped duplicate attribute on tag.", + "expected-end-of-tag-name-but-got-eof": + "Unexpected end of file. Expected = or end of tag.", + "expected-attribute-value-but-got-eof": + "Unexpected end of file. Expected attribute value.", + "expected-attribute-value-but-got-right-bracket": + "Expected attribute value. Got '>' instead.", + 'equals-in-unquoted-attribute-value': + "Unexpected = in unquoted attribute", + 'unexpected-character-in-unquoted-attribute-value': + "Unexpected character in unquoted attribute", + "invalid-character-after-attribute-name": + "Unexpected character after attribute name.", + "unexpected-character-after-attribute-value": + "Unexpected character after attribute value.", + "eof-in-attribute-value-double-quote": + "Unexpected end of file in attribute value (\").", + "eof-in-attribute-value-single-quote": + "Unexpected end of file in attribute value (').", + "eof-in-attribute-value-no-quotes": + "Unexpected end of file in attribute value.", + "unexpected-EOF-after-solidus-in-tag": + "Unexpected end of file in tag. Expected >", + "unexpected-character-after-solidus-in-tag": + "Unexpected character after / in tag. Expected >", + "expected-dashes-or-doctype": + "Expected '--' or 'DOCTYPE'. Not found.", + "unexpected-bang-after-double-dash-in-comment": + "Unexpected ! after -- in comment", + "unexpected-space-after-double-dash-in-comment": + "Unexpected space after -- in comment", + "incorrect-comment": + "Incorrect comment.", + "eof-in-comment": + "Unexpected end of file in comment.", + "eof-in-comment-end-dash": + "Unexpected end of file in comment (-)", + "unexpected-dash-after-double-dash-in-comment": + "Unexpected '-' after '--' found in comment.", + "eof-in-comment-double-dash": + "Unexpected end of file in comment (--).", + "eof-in-comment-end-space-state": + "Unexpected end of file in comment.", + "eof-in-comment-end-bang-state": + "Unexpected end of file in comment.", + "unexpected-char-in-comment": + "Unexpected character in comment found.", + "need-space-after-doctype": + "No space after literal string 'DOCTYPE'.", + "expected-doctype-name-but-got-right-bracket": + "Unexpected > character. Expected DOCTYPE name.", + "expected-doctype-name-but-got-eof": + "Unexpected end of file. Expected DOCTYPE name.", + "eof-in-doctype-name": + "Unexpected end of file in DOCTYPE name.", + "eof-in-doctype": + "Unexpected end of file in DOCTYPE.", + "expected-space-or-right-bracket-in-doctype": + "Expected space or '>'. Got '%(data)s'", + "unexpected-end-of-doctype": + "Unexpected end of DOCTYPE.", + "unexpected-char-in-doctype": + "Unexpected character in DOCTYPE.", + "eof-in-innerhtml": + "XXX innerHTML EOF", + "unexpected-doctype": + "Unexpected DOCTYPE. Ignored.", + "non-html-root": + "html needs to be the first start tag.", + "expected-doctype-but-got-eof": + "Unexpected End of file. Expected DOCTYPE.", + "unknown-doctype": + "Erroneous DOCTYPE.", + "expected-doctype-but-got-chars": + "Unexpected non-space characters. Expected DOCTYPE.", + "expected-doctype-but-got-start-tag": + "Unexpected start tag (%(name)s). Expected DOCTYPE.", + "expected-doctype-but-got-end-tag": + "Unexpected end tag (%(name)s). Expected DOCTYPE.", + "end-tag-after-implied-root": + "Unexpected end tag (%(name)s) after the (implied) root element.", + "expected-named-closing-tag-but-got-eof": + "Unexpected end of file. Expected end tag (%(name)s).", + "two-heads-are-not-better-than-one": + "Unexpected start tag head in existing head. Ignored.", + "unexpected-end-tag": + "Unexpected end tag (%(name)s). Ignored.", + "unexpected-start-tag-out-of-my-head": + "Unexpected start tag (%(name)s) that can be in head. Moved.", + "unexpected-start-tag": + "Unexpected start tag (%(name)s).", + "missing-end-tag": + "Missing end tag (%(name)s).", + "missing-end-tags": + "Missing end tags (%(name)s).", + "unexpected-start-tag-implies-end-tag": + "Unexpected start tag (%(startName)s) " + "implies end tag (%(endName)s).", + "unexpected-start-tag-treated-as": + "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", + "deprecated-tag": + "Unexpected start tag %(name)s. Don't use it!", + "unexpected-start-tag-ignored": + "Unexpected start tag %(name)s. Ignored.", + "expected-one-end-tag-but-got-another": + "Unexpected end tag (%(gotName)s). " + "Missing end tag (%(expectedName)s).", + "end-tag-too-early": + "End tag (%(name)s) seen too early. Expected other end tag.", + "end-tag-too-early-named": + "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", + "end-tag-too-early-ignored": + "End tag (%(name)s) seen too early. Ignored.", + "adoption-agency-1.1": + "End tag (%(name)s) violates step 1, " + "paragraph 1 of the adoption agency algorithm.", + "adoption-agency-1.2": + "End tag (%(name)s) violates step 1, " + "paragraph 2 of the adoption agency algorithm.", + "adoption-agency-1.3": + "End tag (%(name)s) violates step 1, " + "paragraph 3 of the adoption agency algorithm.", + "adoption-agency-4.4": + "End tag (%(name)s) violates step 4, " + "paragraph 4 of the adoption agency algorithm.", + "unexpected-end-tag-treated-as": + "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", + "no-end-tag": + "This element (%(name)s) has no end tag.", + "unexpected-implied-end-tag-in-table": + "Unexpected implied end tag (%(name)s) in the table phase.", + "unexpected-implied-end-tag-in-table-body": + "Unexpected implied end tag (%(name)s) in the table body phase.", + "unexpected-char-implies-table-voodoo": + "Unexpected non-space characters in " + "table context caused voodoo mode.", + "unexpected-hidden-input-in-table": + "Unexpected input with type hidden in table context.", + "unexpected-form-in-table": + "Unexpected form in table context.", + "unexpected-start-tag-implies-table-voodoo": + "Unexpected start tag (%(name)s) in " + "table context caused voodoo mode.", + "unexpected-end-tag-implies-table-voodoo": + "Unexpected end tag (%(name)s) in " + "table context caused voodoo mode.", + "unexpected-cell-in-table-body": + "Unexpected table cell start tag (%(name)s) " + "in the table body phase.", + "unexpected-cell-end-tag": + "Got table cell end tag (%(name)s) " + "while required end tags are missing.", + "unexpected-end-tag-in-table-body": + "Unexpected end tag (%(name)s) in the table body phase. Ignored.", + "unexpected-implied-end-tag-in-table-row": + "Unexpected implied end tag (%(name)s) in the table row phase.", + "unexpected-end-tag-in-table-row": + "Unexpected end tag (%(name)s) in the table row phase. Ignored.", + "unexpected-select-in-select": + "Unexpected select start tag in the select phase " + "treated as select end tag.", + "unexpected-input-in-select": + "Unexpected input start tag in the select phase.", + "unexpected-start-tag-in-select": + "Unexpected start tag token (%(name)s in the select phase. " + "Ignored.", + "unexpected-end-tag-in-select": + "Unexpected end tag (%(name)s) in the select phase. Ignored.", + "unexpected-table-element-start-tag-in-select-in-table": + "Unexpected table element start tag (%(name)s) in the select in table phase.", + "unexpected-table-element-end-tag-in-select-in-table": + "Unexpected table element end tag (%(name)s) in the select in table phase.", + "unexpected-char-after-body": + "Unexpected non-space characters in the after body phase.", + "unexpected-start-tag-after-body": + "Unexpected start tag token (%(name)s)" + " in the after body phase.", + "unexpected-end-tag-after-body": + "Unexpected end tag token (%(name)s)" + " in the after body phase.", + "unexpected-char-in-frameset": + "Unexpected characters in the frameset phase. Characters ignored.", + "unexpected-start-tag-in-frameset": + "Unexpected start tag token (%(name)s)" + " in the frameset phase. Ignored.", + "unexpected-frameset-in-frameset-innerhtml": + "Unexpected end tag token (frameset) " + "in the frameset phase (innerHTML).", + "unexpected-end-tag-in-frameset": + "Unexpected end tag token (%(name)s)" + " in the frameset phase. Ignored.", + "unexpected-char-after-frameset": + "Unexpected non-space characters in the " + "after frameset phase. Ignored.", + "unexpected-start-tag-after-frameset": + "Unexpected start tag (%(name)s)" + " in the after frameset phase. Ignored.", + "unexpected-end-tag-after-frameset": + "Unexpected end tag (%(name)s)" + " in the after frameset phase. Ignored.", + "unexpected-end-tag-after-body-innerhtml": + "Unexpected end tag after body(innerHtml)", + "expected-eof-but-got-char": + "Unexpected non-space characters. Expected end of file.", + "expected-eof-but-got-start-tag": + "Unexpected start tag (%(name)s)" + ". Expected end of file.", + "expected-eof-but-got-end-tag": + "Unexpected end tag (%(name)s)" + ". Expected end of file.", + "eof-in-table": + "Unexpected end of file. Expected table content.", + "eof-in-select": + "Unexpected end of file. Expected select content.", + "eof-in-frameset": + "Unexpected end of file. Expected frameset content.", + "eof-in-script-in-script": + "Unexpected end of file. Expected script content.", + "eof-in-foreign-lands": + "Unexpected end of file. Expected foreign content", + "non-void-element-with-trailing-solidus": + "Trailing solidus not allowed on element %(name)s", + "unexpected-html-element-in-foreign-content": + "Element %(name)s not allowed in a non-html context", + "unexpected-end-tag-before-html": + "Unexpected end tag (%(name)s) before html.", + "unexpected-inhead-noscript-tag": + "Element %(name)s not allowed in a inhead-noscript context", + "eof-in-head-noscript": + "Unexpected end of file. Expected inhead-noscript content", + "char-in-head-noscript": + "Unexpected non-space character. Expected inhead-noscript content", + "XXX-undefined-error": + "Undefined error (this sucks and should be fixed)", +} + +namespaces = { + "html": "http://www.w3.org/1999/xhtml", + "mathml": "http://www.w3.org/1998/Math/MathML", + "svg": "http://www.w3.org/2000/svg", + "xlink": "http://www.w3.org/1999/xlink", + "xml": "http://www.w3.org/XML/1998/namespace", + "xmlns": "http://www.w3.org/2000/xmlns/" +} + +scopingElements = frozenset([ + (namespaces["html"], "applet"), + (namespaces["html"], "caption"), + (namespaces["html"], "html"), + (namespaces["html"], "marquee"), + (namespaces["html"], "object"), + (namespaces["html"], "table"), + (namespaces["html"], "td"), + (namespaces["html"], "th"), + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext"), + (namespaces["mathml"], "annotation-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title"), +]) + +formattingElements = frozenset([ + (namespaces["html"], "a"), + (namespaces["html"], "b"), + (namespaces["html"], "big"), + (namespaces["html"], "code"), + (namespaces["html"], "em"), + (namespaces["html"], "font"), + (namespaces["html"], "i"), + (namespaces["html"], "nobr"), + (namespaces["html"], "s"), + (namespaces["html"], "small"), + (namespaces["html"], "strike"), + (namespaces["html"], "strong"), + (namespaces["html"], "tt"), + (namespaces["html"], "u") +]) + +specialElements = frozenset([ + (namespaces["html"], "address"), + (namespaces["html"], "applet"), + (namespaces["html"], "area"), + (namespaces["html"], "article"), + (namespaces["html"], "aside"), + (namespaces["html"], "base"), + (namespaces["html"], "basefont"), + (namespaces["html"], "bgsound"), + (namespaces["html"], "blockquote"), + (namespaces["html"], "body"), + (namespaces["html"], "br"), + (namespaces["html"], "button"), + (namespaces["html"], "caption"), + (namespaces["html"], "center"), + (namespaces["html"], "col"), + (namespaces["html"], "colgroup"), + (namespaces["html"], "command"), + (namespaces["html"], "dd"), + (namespaces["html"], "details"), + (namespaces["html"], "dir"), + (namespaces["html"], "div"), + (namespaces["html"], "dl"), + (namespaces["html"], "dt"), + (namespaces["html"], "embed"), + (namespaces["html"], "fieldset"), + (namespaces["html"], "figure"), + (namespaces["html"], "footer"), + (namespaces["html"], "form"), + (namespaces["html"], "frame"), + (namespaces["html"], "frameset"), + (namespaces["html"], "h1"), + (namespaces["html"], "h2"), + (namespaces["html"], "h3"), + (namespaces["html"], "h4"), + (namespaces["html"], "h5"), + (namespaces["html"], "h6"), + (namespaces["html"], "head"), + (namespaces["html"], "header"), + (namespaces["html"], "hr"), + (namespaces["html"], "html"), + (namespaces["html"], "iframe"), + # Note that image is commented out in the spec as "this isn't an + # element that can end up on the stack, so it doesn't matter," + (namespaces["html"], "image"), + (namespaces["html"], "img"), + (namespaces["html"], "input"), + (namespaces["html"], "isindex"), + (namespaces["html"], "li"), + (namespaces["html"], "link"), + (namespaces["html"], "listing"), + (namespaces["html"], "marquee"), + (namespaces["html"], "menu"), + (namespaces["html"], "meta"), + (namespaces["html"], "nav"), + (namespaces["html"], "noembed"), + (namespaces["html"], "noframes"), + (namespaces["html"], "noscript"), + (namespaces["html"], "object"), + (namespaces["html"], "ol"), + (namespaces["html"], "p"), + (namespaces["html"], "param"), + (namespaces["html"], "plaintext"), + (namespaces["html"], "pre"), + (namespaces["html"], "script"), + (namespaces["html"], "section"), + (namespaces["html"], "select"), + (namespaces["html"], "style"), + (namespaces["html"], "table"), + (namespaces["html"], "tbody"), + (namespaces["html"], "td"), + (namespaces["html"], "textarea"), + (namespaces["html"], "tfoot"), + (namespaces["html"], "th"), + (namespaces["html"], "thead"), + (namespaces["html"], "title"), + (namespaces["html"], "tr"), + (namespaces["html"], "ul"), + (namespaces["html"], "wbr"), + (namespaces["html"], "xmp"), + (namespaces["svg"], "foreignObject") +]) + +htmlIntegrationPointElements = frozenset([ + (namespaces["mathml"], "annotation-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title") +]) + +mathmlTextIntegrationPointElements = frozenset([ + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext") +]) + +adjustSVGAttributes = { + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "contentscripttype": "contentScriptType", + "contentstyletype": "contentStyleType", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "externalresourcesrequired": "externalResourcesRequired", + "filterres": "filterRes", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan" +} + +adjustMathMLAttributes = {"definitionurl": "definitionURL"} + +adjustForeignAttributes = { + "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), + "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), + "xlink:href": ("xlink", "href", namespaces["xlink"]), + "xlink:role": ("xlink", "role", namespaces["xlink"]), + "xlink:show": ("xlink", "show", namespaces["xlink"]), + "xlink:title": ("xlink", "title", namespaces["xlink"]), + "xlink:type": ("xlink", "type", namespaces["xlink"]), + "xml:base": ("xml", "base", namespaces["xml"]), + "xml:lang": ("xml", "lang", namespaces["xml"]), + "xml:space": ("xml", "space", namespaces["xml"]), + "xmlns": (None, "xmlns", namespaces["xmlns"]), + "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) +} + +unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in + adjustForeignAttributes.items()]) + +spaceCharacters = frozenset([ + "\t", + "\n", + "\u000C", + " ", + "\r" +]) + +tableInsertModeElements = frozenset([ + "table", + "tbody", + "tfoot", + "thead", + "tr" +]) + +asciiLowercase = frozenset(string.ascii_lowercase) +asciiUppercase = frozenset(string.ascii_uppercase) +asciiLetters = frozenset(string.ascii_letters) +digits = frozenset(string.digits) +hexDigits = frozenset(string.hexdigits) + +asciiUpper2Lower = dict([(ord(c), ord(c.lower())) + for c in string.ascii_uppercase]) + +# Heading elements need to be ordered +headingElements = ( + "h1", + "h2", + "h3", + "h4", + "h5", + "h6" +) + +voidElements = frozenset([ + "base", + "command", + "event-source", + "link", + "meta", + "hr", + "br", + "img", + "embed", + "param", + "area", + "col", + "input", + "source", + "track" +]) + +cdataElements = frozenset(['title', 'textarea']) + +rcdataElements = frozenset([ + 'style', + 'script', + 'xmp', + 'iframe', + 'noembed', + 'noframes', + 'noscript' +]) + +booleanAttributes = { + "": frozenset(["irrelevant", "itemscope"]), + "style": frozenset(["scoped"]), + "img": frozenset(["ismap"]), + "audio": frozenset(["autoplay", "controls"]), + "video": frozenset(["autoplay", "controls"]), + "script": frozenset(["defer", "async"]), + "details": frozenset(["open"]), + "datagrid": frozenset(["multiple", "disabled"]), + "command": frozenset(["hidden", "disabled", "checked", "default"]), + "hr": frozenset(["noshade"]), + "menu": frozenset(["autosubmit"]), + "fieldset": frozenset(["disabled", "readonly"]), + "option": frozenset(["disabled", "readonly", "selected"]), + "optgroup": frozenset(["disabled", "readonly"]), + "button": frozenset(["disabled", "autofocus"]), + "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), + "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), + "output": frozenset(["disabled", "readonly"]), + "iframe": frozenset(["seamless"]), +} + +# entitiesWindows1252 has to be _ordered_ and needs to have an index. It +# therefore can't be a frozenset. +entitiesWindows1252 = ( + 8364, # 0x80 0x20AC EURO SIGN + 65533, # 0x81 UNDEFINED + 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK + 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK + 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK + 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS + 8224, # 0x86 0x2020 DAGGER + 8225, # 0x87 0x2021 DOUBLE DAGGER + 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT + 8240, # 0x89 0x2030 PER MILLE SIGN + 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON + 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE + 65533, # 0x8D UNDEFINED + 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON + 65533, # 0x8F UNDEFINED + 65533, # 0x90 UNDEFINED + 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK + 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK + 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK + 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK + 8226, # 0x95 0x2022 BULLET + 8211, # 0x96 0x2013 EN DASH + 8212, # 0x97 0x2014 EM DASH + 732, # 0x98 0x02DC SMALL TILDE + 8482, # 0x99 0x2122 TRADE MARK SIGN + 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON + 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE + 65533, # 0x9D UNDEFINED + 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON + 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +) + +xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) + +entities = { + "AElig": "\xc6", + "AElig;": "\xc6", + "AMP": "&", + "AMP;": "&", + "Aacute": "\xc1", + "Aacute;": "\xc1", + "Abreve;": "\u0102", + "Acirc": "\xc2", + "Acirc;": "\xc2", + "Acy;": "\u0410", + "Afr;": "\U0001d504", + "Agrave": "\xc0", + "Agrave;": "\xc0", + "Alpha;": "\u0391", + "Amacr;": "\u0100", + "And;": "\u2a53", + "Aogon;": "\u0104", + "Aopf;": "\U0001d538", + "ApplyFunction;": "\u2061", + "Aring": "\xc5", + "Aring;": "\xc5", + "Ascr;": "\U0001d49c", + "Assign;": "\u2254", + "Atilde": "\xc3", + "Atilde;": "\xc3", + "Auml": "\xc4", + "Auml;": "\xc4", + "Backslash;": "\u2216", + "Barv;": "\u2ae7", + "Barwed;": "\u2306", + "Bcy;": "\u0411", + "Because;": "\u2235", + "Bernoullis;": "\u212c", + "Beta;": "\u0392", + "Bfr;": "\U0001d505", + "Bopf;": "\U0001d539", + "Breve;": "\u02d8", + "Bscr;": "\u212c", + "Bumpeq;": "\u224e", + "CHcy;": "\u0427", + "COPY": "\xa9", + "COPY;": "\xa9", + "Cacute;": "\u0106", + "Cap;": "\u22d2", + "CapitalDifferentialD;": "\u2145", + "Cayleys;": "\u212d", + "Ccaron;": "\u010c", + "Ccedil": "\xc7", + "Ccedil;": "\xc7", + "Ccirc;": "\u0108", + "Cconint;": "\u2230", + "Cdot;": "\u010a", + "Cedilla;": "\xb8", + "CenterDot;": "\xb7", + "Cfr;": "\u212d", + "Chi;": "\u03a7", + "CircleDot;": "\u2299", + "CircleMinus;": "\u2296", + "CirclePlus;": "\u2295", + "CircleTimes;": "\u2297", + "ClockwiseContourIntegral;": "\u2232", + "CloseCurlyDoubleQuote;": "\u201d", + "CloseCurlyQuote;": "\u2019", + "Colon;": "\u2237", + "Colone;": "\u2a74", + "Congruent;": "\u2261", + "Conint;": "\u222f", + "ContourIntegral;": "\u222e", + "Copf;": "\u2102", + "Coproduct;": "\u2210", + "CounterClockwiseContourIntegral;": "\u2233", + "Cross;": "\u2a2f", + "Cscr;": "\U0001d49e", + "Cup;": "\u22d3", + "CupCap;": "\u224d", + "DD;": "\u2145", + "DDotrahd;": "\u2911", + "DJcy;": "\u0402", + "DScy;": "\u0405", + "DZcy;": "\u040f", + "Dagger;": "\u2021", + "Darr;": "\u21a1", + "Dashv;": "\u2ae4", + "Dcaron;": "\u010e", + "Dcy;": "\u0414", + "Del;": "\u2207", + "Delta;": "\u0394", + "Dfr;": "\U0001d507", + "DiacriticalAcute;": "\xb4", + "DiacriticalDot;": "\u02d9", + "DiacriticalDoubleAcute;": "\u02dd", + "DiacriticalGrave;": "`", + "DiacriticalTilde;": "\u02dc", + "Diamond;": "\u22c4", + "DifferentialD;": "\u2146", + "Dopf;": "\U0001d53b", + "Dot;": "\xa8", + "DotDot;": "\u20dc", + "DotEqual;": "\u2250", + "DoubleContourIntegral;": "\u222f", + "DoubleDot;": "\xa8", + "DoubleDownArrow;": "\u21d3", + "DoubleLeftArrow;": "\u21d0", + "DoubleLeftRightArrow;": "\u21d4", + "DoubleLeftTee;": "\u2ae4", + "DoubleLongLeftArrow;": "\u27f8", + "DoubleLongLeftRightArrow;": "\u27fa", + "DoubleLongRightArrow;": "\u27f9", + "DoubleRightArrow;": "\u21d2", + "DoubleRightTee;": "\u22a8", + "DoubleUpArrow;": "\u21d1", + "DoubleUpDownArrow;": "\u21d5", + "DoubleVerticalBar;": "\u2225", + "DownArrow;": "\u2193", + "DownArrowBar;": "\u2913", + "DownArrowUpArrow;": "\u21f5", + "DownBreve;": "\u0311", + "DownLeftRightVector;": "\u2950", + "DownLeftTeeVector;": "\u295e", + "DownLeftVector;": "\u21bd", + "DownLeftVectorBar;": "\u2956", + "DownRightTeeVector;": "\u295f", + "DownRightVector;": "\u21c1", + "DownRightVectorBar;": "\u2957", + "DownTee;": "\u22a4", + "DownTeeArrow;": "\u21a7", + "Downarrow;": "\u21d3", + "Dscr;": "\U0001d49f", + "Dstrok;": "\u0110", + "ENG;": "\u014a", + "ETH": "\xd0", + "ETH;": "\xd0", + "Eacute": "\xc9", + "Eacute;": "\xc9", + "Ecaron;": "\u011a", + "Ecirc": "\xca", + "Ecirc;": "\xca", + "Ecy;": "\u042d", + "Edot;": "\u0116", + "Efr;": "\U0001d508", + "Egrave": "\xc8", + "Egrave;": "\xc8", + "Element;": "\u2208", + "Emacr;": "\u0112", + "EmptySmallSquare;": "\u25fb", + "EmptyVerySmallSquare;": "\u25ab", + "Eogon;": "\u0118", + "Eopf;": "\U0001d53c", + "Epsilon;": "\u0395", + "Equal;": "\u2a75", + "EqualTilde;": "\u2242", + "Equilibrium;": "\u21cc", + "Escr;": "\u2130", + "Esim;": "\u2a73", + "Eta;": "\u0397", + "Euml": "\xcb", + "Euml;": "\xcb", + "Exists;": "\u2203", + "ExponentialE;": "\u2147", + "Fcy;": "\u0424", + "Ffr;": "\U0001d509", + "FilledSmallSquare;": "\u25fc", + "FilledVerySmallSquare;": "\u25aa", + "Fopf;": "\U0001d53d", + "ForAll;": "\u2200", + "Fouriertrf;": "\u2131", + "Fscr;": "\u2131", + "GJcy;": "\u0403", + "GT": ">", + "GT;": ">", + "Gamma;": "\u0393", + "Gammad;": "\u03dc", + "Gbreve;": "\u011e", + "Gcedil;": "\u0122", + "Gcirc;": "\u011c", + "Gcy;": "\u0413", + "Gdot;": "\u0120", + "Gfr;": "\U0001d50a", + "Gg;": "\u22d9", + "Gopf;": "\U0001d53e", + "GreaterEqual;": "\u2265", + "GreaterEqualLess;": "\u22db", + "GreaterFullEqual;": "\u2267", + "GreaterGreater;": "\u2aa2", + "GreaterLess;": "\u2277", + "GreaterSlantEqual;": "\u2a7e", + "GreaterTilde;": "\u2273", + "Gscr;": "\U0001d4a2", + "Gt;": "\u226b", + "HARDcy;": "\u042a", + "Hacek;": "\u02c7", + "Hat;": "^", + "Hcirc;": "\u0124", + "Hfr;": "\u210c", + "HilbertSpace;": "\u210b", + "Hopf;": "\u210d", + "HorizontalLine;": "\u2500", + "Hscr;": "\u210b", + "Hstrok;": "\u0126", + "HumpDownHump;": "\u224e", + "HumpEqual;": "\u224f", + "IEcy;": "\u0415", + "IJlig;": "\u0132", + "IOcy;": "\u0401", + "Iacute": "\xcd", + "Iacute;": "\xcd", + "Icirc": "\xce", + "Icirc;": "\xce", + "Icy;": "\u0418", + "Idot;": "\u0130", + "Ifr;": "\u2111", + "Igrave": "\xcc", + "Igrave;": "\xcc", + "Im;": "\u2111", + "Imacr;": "\u012a", + "ImaginaryI;": "\u2148", + "Implies;": "\u21d2", + "Int;": "\u222c", + "Integral;": "\u222b", + "Intersection;": "\u22c2", + "InvisibleComma;": "\u2063", + "InvisibleTimes;": "\u2062", + "Iogon;": "\u012e", + "Iopf;": "\U0001d540", + "Iota;": "\u0399", + "Iscr;": "\u2110", + "Itilde;": "\u0128", + "Iukcy;": "\u0406", + "Iuml": "\xcf", + "Iuml;": "\xcf", + "Jcirc;": "\u0134", + "Jcy;": "\u0419", + "Jfr;": "\U0001d50d", + "Jopf;": "\U0001d541", + "Jscr;": "\U0001d4a5", + "Jsercy;": "\u0408", + "Jukcy;": "\u0404", + "KHcy;": "\u0425", + "KJcy;": "\u040c", + "Kappa;": "\u039a", + "Kcedil;": "\u0136", + "Kcy;": "\u041a", + "Kfr;": "\U0001d50e", + "Kopf;": "\U0001d542", + "Kscr;": "\U0001d4a6", + "LJcy;": "\u0409", + "LT": "<", + "LT;": "<", + "Lacute;": "\u0139", + "Lambda;": "\u039b", + "Lang;": "\u27ea", + "Laplacetrf;": "\u2112", + "Larr;": "\u219e", + "Lcaron;": "\u013d", + "Lcedil;": "\u013b", + "Lcy;": "\u041b", + "LeftAngleBracket;": "\u27e8", + "LeftArrow;": "\u2190", + "LeftArrowBar;": "\u21e4", + "LeftArrowRightArrow;": "\u21c6", + "LeftCeiling;": "\u2308", + "LeftDoubleBracket;": "\u27e6", + "LeftDownTeeVector;": "\u2961", + "LeftDownVector;": "\u21c3", + "LeftDownVectorBar;": "\u2959", + "LeftFloor;": "\u230a", + "LeftRightArrow;": "\u2194", + "LeftRightVector;": "\u294e", + "LeftTee;": "\u22a3", + "LeftTeeArrow;": "\u21a4", + "LeftTeeVector;": "\u295a", + "LeftTriangle;": "\u22b2", + "LeftTriangleBar;": "\u29cf", + "LeftTriangleEqual;": "\u22b4", + "LeftUpDownVector;": "\u2951", + "LeftUpTeeVector;": "\u2960", + "LeftUpVector;": "\u21bf", + "LeftUpVectorBar;": "\u2958", + "LeftVector;": "\u21bc", + "LeftVectorBar;": "\u2952", + "Leftarrow;": "\u21d0", + "Leftrightarrow;": "\u21d4", + "LessEqualGreater;": "\u22da", + "LessFullEqual;": "\u2266", + "LessGreater;": "\u2276", + "LessLess;": "\u2aa1", + "LessSlantEqual;": "\u2a7d", + "LessTilde;": "\u2272", + "Lfr;": "\U0001d50f", + "Ll;": "\u22d8", + "Lleftarrow;": "\u21da", + "Lmidot;": "\u013f", + "LongLeftArrow;": "\u27f5", + "LongLeftRightArrow;": "\u27f7", + "LongRightArrow;": "\u27f6", + "Longleftarrow;": "\u27f8", + "Longleftrightarrow;": "\u27fa", + "Longrightarrow;": "\u27f9", + "Lopf;": "\U0001d543", + "LowerLeftArrow;": "\u2199", + "LowerRightArrow;": "\u2198", + "Lscr;": "\u2112", + "Lsh;": "\u21b0", + "Lstrok;": "\u0141", + "Lt;": "\u226a", + "Map;": "\u2905", + "Mcy;": "\u041c", + "MediumSpace;": "\u205f", + "Mellintrf;": "\u2133", + "Mfr;": "\U0001d510", + "MinusPlus;": "\u2213", + "Mopf;": "\U0001d544", + "Mscr;": "\u2133", + "Mu;": "\u039c", + "NJcy;": "\u040a", + "Nacute;": "\u0143", + "Ncaron;": "\u0147", + "Ncedil;": "\u0145", + "Ncy;": "\u041d", + "NegativeMediumSpace;": "\u200b", + "NegativeThickSpace;": "\u200b", + "NegativeThinSpace;": "\u200b", + "NegativeVeryThinSpace;": "\u200b", + "NestedGreaterGreater;": "\u226b", + "NestedLessLess;": "\u226a", + "NewLine;": "\n", + "Nfr;": "\U0001d511", + "NoBreak;": "\u2060", + "NonBreakingSpace;": "\xa0", + "Nopf;": "\u2115", + "Not;": "\u2aec", + "NotCongruent;": "\u2262", + "NotCupCap;": "\u226d", + "NotDoubleVerticalBar;": "\u2226", + "NotElement;": "\u2209", + "NotEqual;": "\u2260", + "NotEqualTilde;": "\u2242\u0338", + "NotExists;": "\u2204", + "NotGreater;": "\u226f", + "NotGreaterEqual;": "\u2271", + "NotGreaterFullEqual;": "\u2267\u0338", + "NotGreaterGreater;": "\u226b\u0338", + "NotGreaterLess;": "\u2279", + "NotGreaterSlantEqual;": "\u2a7e\u0338", + "NotGreaterTilde;": "\u2275", + "NotHumpDownHump;": "\u224e\u0338", + "NotHumpEqual;": "\u224f\u0338", + "NotLeftTriangle;": "\u22ea", + "NotLeftTriangleBar;": "\u29cf\u0338", + "NotLeftTriangleEqual;": "\u22ec", + "NotLess;": "\u226e", + "NotLessEqual;": "\u2270", + "NotLessGreater;": "\u2278", + "NotLessLess;": "\u226a\u0338", + "NotLessSlantEqual;": "\u2a7d\u0338", + "NotLessTilde;": "\u2274", + "NotNestedGreaterGreater;": "\u2aa2\u0338", + "NotNestedLessLess;": "\u2aa1\u0338", + "NotPrecedes;": "\u2280", + "NotPrecedesEqual;": "\u2aaf\u0338", + "NotPrecedesSlantEqual;": "\u22e0", + "NotReverseElement;": "\u220c", + "NotRightTriangle;": "\u22eb", + "NotRightTriangleBar;": "\u29d0\u0338", + "NotRightTriangleEqual;": "\u22ed", + "NotSquareSubset;": "\u228f\u0338", + "NotSquareSubsetEqual;": "\u22e2", + "NotSquareSuperset;": "\u2290\u0338", + "NotSquareSupersetEqual;": "\u22e3", + "NotSubset;": "\u2282\u20d2", + "NotSubsetEqual;": "\u2288", + "NotSucceeds;": "\u2281", + "NotSucceedsEqual;": "\u2ab0\u0338", + "NotSucceedsSlantEqual;": "\u22e1", + "NotSucceedsTilde;": "\u227f\u0338", + "NotSuperset;": "\u2283\u20d2", + "NotSupersetEqual;": "\u2289", + "NotTilde;": "\u2241", + "NotTildeEqual;": "\u2244", + "NotTildeFullEqual;": "\u2247", + "NotTildeTilde;": "\u2249", + "NotVerticalBar;": "\u2224", + "Nscr;": "\U0001d4a9", + "Ntilde": "\xd1", + "Ntilde;": "\xd1", + "Nu;": "\u039d", + "OElig;": "\u0152", + "Oacute": "\xd3", + "Oacute;": "\xd3", + "Ocirc": "\xd4", + "Ocirc;": "\xd4", + "Ocy;": "\u041e", + "Odblac;": "\u0150", + "Ofr;": "\U0001d512", + "Ograve": "\xd2", + "Ograve;": "\xd2", + "Omacr;": "\u014c", + "Omega;": "\u03a9", + "Omicron;": "\u039f", + "Oopf;": "\U0001d546", + "OpenCurlyDoubleQuote;": "\u201c", + "OpenCurlyQuote;": "\u2018", + "Or;": "\u2a54", + "Oscr;": "\U0001d4aa", + "Oslash": "\xd8", + "Oslash;": "\xd8", + "Otilde": "\xd5", + "Otilde;": "\xd5", + "Otimes;": "\u2a37", + "Ouml": "\xd6", + "Ouml;": "\xd6", + "OverBar;": "\u203e", + "OverBrace;": "\u23de", + "OverBracket;": "\u23b4", + "OverParenthesis;": "\u23dc", + "PartialD;": "\u2202", + "Pcy;": "\u041f", + "Pfr;": "\U0001d513", + "Phi;": "\u03a6", + "Pi;": "\u03a0", + "PlusMinus;": "\xb1", + "Poincareplane;": "\u210c", + "Popf;": "\u2119", + "Pr;": "\u2abb", + "Precedes;": "\u227a", + "PrecedesEqual;": "\u2aaf", + "PrecedesSlantEqual;": "\u227c", + "PrecedesTilde;": "\u227e", + "Prime;": "\u2033", + "Product;": "\u220f", + "Proportion;": "\u2237", + "Proportional;": "\u221d", + "Pscr;": "\U0001d4ab", + "Psi;": "\u03a8", + "QUOT": "\"", + "QUOT;": "\"", + "Qfr;": "\U0001d514", + "Qopf;": "\u211a", + "Qscr;": "\U0001d4ac", + "RBarr;": "\u2910", + "REG": "\xae", + "REG;": "\xae", + "Racute;": "\u0154", + "Rang;": "\u27eb", + "Rarr;": "\u21a0", + "Rarrtl;": "\u2916", + "Rcaron;": "\u0158", + "Rcedil;": "\u0156", + "Rcy;": "\u0420", + "Re;": "\u211c", + "ReverseElement;": "\u220b", + "ReverseEquilibrium;": "\u21cb", + "ReverseUpEquilibrium;": "\u296f", + "Rfr;": "\u211c", + "Rho;": "\u03a1", + "RightAngleBracket;": "\u27e9", + "RightArrow;": "\u2192", + "RightArrowBar;": "\u21e5", + "RightArrowLeftArrow;": "\u21c4", + "RightCeiling;": "\u2309", + "RightDoubleBracket;": "\u27e7", + "RightDownTeeVector;": "\u295d", + "RightDownVector;": "\u21c2", + "RightDownVectorBar;": "\u2955", + "RightFloor;": "\u230b", + "RightTee;": "\u22a2", + "RightTeeArrow;": "\u21a6", + "RightTeeVector;": "\u295b", + "RightTriangle;": "\u22b3", + "RightTriangleBar;": "\u29d0", + "RightTriangleEqual;": "\u22b5", + "RightUpDownVector;": "\u294f", + "RightUpTeeVector;": "\u295c", + "RightUpVector;": "\u21be", + "RightUpVectorBar;": "\u2954", + "RightVector;": "\u21c0", + "RightVectorBar;": "\u2953", + "Rightarrow;": "\u21d2", + "Ropf;": "\u211d", + "RoundImplies;": "\u2970", + "Rrightarrow;": "\u21db", + "Rscr;": "\u211b", + "Rsh;": "\u21b1", + "RuleDelayed;": "\u29f4", + "SHCHcy;": "\u0429", + "SHcy;": "\u0428", + "SOFTcy;": "\u042c", + "Sacute;": "\u015a", + "Sc;": "\u2abc", + "Scaron;": "\u0160", + "Scedil;": "\u015e", + "Scirc;": "\u015c", + "Scy;": "\u0421", + "Sfr;": "\U0001d516", + "ShortDownArrow;": "\u2193", + "ShortLeftArrow;": "\u2190", + "ShortRightArrow;": "\u2192", + "ShortUpArrow;": "\u2191", + "Sigma;": "\u03a3", + "SmallCircle;": "\u2218", + "Sopf;": "\U0001d54a", + "Sqrt;": "\u221a", + "Square;": "\u25a1", + "SquareIntersection;": "\u2293", + "SquareSubset;": "\u228f", + "SquareSubsetEqual;": "\u2291", + "SquareSuperset;": "\u2290", + "SquareSupersetEqual;": "\u2292", + "SquareUnion;": "\u2294", + "Sscr;": "\U0001d4ae", + "Star;": "\u22c6", + "Sub;": "\u22d0", + "Subset;": "\u22d0", + "SubsetEqual;": "\u2286", + "Succeeds;": "\u227b", + "SucceedsEqual;": "\u2ab0", + "SucceedsSlantEqual;": "\u227d", + "SucceedsTilde;": "\u227f", + "SuchThat;": "\u220b", + "Sum;": "\u2211", + "Sup;": "\u22d1", + "Superset;": "\u2283", + "SupersetEqual;": "\u2287", + "Supset;": "\u22d1", + "THORN": "\xde", + "THORN;": "\xde", + "TRADE;": "\u2122", + "TSHcy;": "\u040b", + "TScy;": "\u0426", + "Tab;": "\t", + "Tau;": "\u03a4", + "Tcaron;": "\u0164", + "Tcedil;": "\u0162", + "Tcy;": "\u0422", + "Tfr;": "\U0001d517", + "Therefore;": "\u2234", + "Theta;": "\u0398", + "ThickSpace;": "\u205f\u200a", + "ThinSpace;": "\u2009", + "Tilde;": "\u223c", + "TildeEqual;": "\u2243", + "TildeFullEqual;": "\u2245", + "TildeTilde;": "\u2248", + "Topf;": "\U0001d54b", + "TripleDot;": "\u20db", + "Tscr;": "\U0001d4af", + "Tstrok;": "\u0166", + "Uacute": "\xda", + "Uacute;": "\xda", + "Uarr;": "\u219f", + "Uarrocir;": "\u2949", + "Ubrcy;": "\u040e", + "Ubreve;": "\u016c", + "Ucirc": "\xdb", + "Ucirc;": "\xdb", + "Ucy;": "\u0423", + "Udblac;": "\u0170", + "Ufr;": "\U0001d518", + "Ugrave": "\xd9", + "Ugrave;": "\xd9", + "Umacr;": "\u016a", + "UnderBar;": "_", + "UnderBrace;": "\u23df", + "UnderBracket;": "\u23b5", + "UnderParenthesis;": "\u23dd", + "Union;": "\u22c3", + "UnionPlus;": "\u228e", + "Uogon;": "\u0172", + "Uopf;": "\U0001d54c", + "UpArrow;": "\u2191", + "UpArrowBar;": "\u2912", + "UpArrowDownArrow;": "\u21c5", + "UpDownArrow;": "\u2195", + "UpEquilibrium;": "\u296e", + "UpTee;": "\u22a5", + "UpTeeArrow;": "\u21a5", + "Uparrow;": "\u21d1", + "Updownarrow;": "\u21d5", + "UpperLeftArrow;": "\u2196", + "UpperRightArrow;": "\u2197", + "Upsi;": "\u03d2", + "Upsilon;": "\u03a5", + "Uring;": "\u016e", + "Uscr;": "\U0001d4b0", + "Utilde;": "\u0168", + "Uuml": "\xdc", + "Uuml;": "\xdc", + "VDash;": "\u22ab", + "Vbar;": "\u2aeb", + "Vcy;": "\u0412", + "Vdash;": "\u22a9", + "Vdashl;": "\u2ae6", + "Vee;": "\u22c1", + "Verbar;": "\u2016", + "Vert;": "\u2016", + "VerticalBar;": "\u2223", + "VerticalLine;": "|", + "VerticalSeparator;": "\u2758", + "VerticalTilde;": "\u2240", + "VeryThinSpace;": "\u200a", + "Vfr;": "\U0001d519", + "Vopf;": "\U0001d54d", + "Vscr;": "\U0001d4b1", + "Vvdash;": "\u22aa", + "Wcirc;": "\u0174", + "Wedge;": "\u22c0", + "Wfr;": "\U0001d51a", + "Wopf;": "\U0001d54e", + "Wscr;": "\U0001d4b2", + "Xfr;": "\U0001d51b", + "Xi;": "\u039e", + "Xopf;": "\U0001d54f", + "Xscr;": "\U0001d4b3", + "YAcy;": "\u042f", + "YIcy;": "\u0407", + "YUcy;": "\u042e", + "Yacute": "\xdd", + "Yacute;": "\xdd", + "Ycirc;": "\u0176", + "Ycy;": "\u042b", + "Yfr;": "\U0001d51c", + "Yopf;": "\U0001d550", + "Yscr;": "\U0001d4b4", + "Yuml;": "\u0178", + "ZHcy;": "\u0416", + "Zacute;": "\u0179", + "Zcaron;": "\u017d", + "Zcy;": "\u0417", + "Zdot;": "\u017b", + "ZeroWidthSpace;": "\u200b", + "Zeta;": "\u0396", + "Zfr;": "\u2128", + "Zopf;": "\u2124", + "Zscr;": "\U0001d4b5", + "aacute": "\xe1", + "aacute;": "\xe1", + "abreve;": "\u0103", + "ac;": "\u223e", + "acE;": "\u223e\u0333", + "acd;": "\u223f", + "acirc": "\xe2", + "acirc;": "\xe2", + "acute": "\xb4", + "acute;": "\xb4", + "acy;": "\u0430", + "aelig": "\xe6", + "aelig;": "\xe6", + "af;": "\u2061", + "afr;": "\U0001d51e", + "agrave": "\xe0", + "agrave;": "\xe0", + "alefsym;": "\u2135", + "aleph;": "\u2135", + "alpha;": "\u03b1", + "amacr;": "\u0101", + "amalg;": "\u2a3f", + "amp": "&", + "amp;": "&", + "and;": "\u2227", + "andand;": "\u2a55", + "andd;": "\u2a5c", + "andslope;": "\u2a58", + "andv;": "\u2a5a", + "ang;": "\u2220", + "ange;": "\u29a4", + "angle;": "\u2220", + "angmsd;": "\u2221", + "angmsdaa;": "\u29a8", + "angmsdab;": "\u29a9", + "angmsdac;": "\u29aa", + "angmsdad;": "\u29ab", + "angmsdae;": "\u29ac", + "angmsdaf;": "\u29ad", + "angmsdag;": "\u29ae", + "angmsdah;": "\u29af", + "angrt;": "\u221f", + "angrtvb;": "\u22be", + "angrtvbd;": "\u299d", + "angsph;": "\u2222", + "angst;": "\xc5", + "angzarr;": "\u237c", + "aogon;": "\u0105", + "aopf;": "\U0001d552", + "ap;": "\u2248", + "apE;": "\u2a70", + "apacir;": "\u2a6f", + "ape;": "\u224a", + "apid;": "\u224b", + "apos;": "'", + "approx;": "\u2248", + "approxeq;": "\u224a", + "aring": "\xe5", + "aring;": "\xe5", + "ascr;": "\U0001d4b6", + "ast;": "*", + "asymp;": "\u2248", + "asympeq;": "\u224d", + "atilde": "\xe3", + "atilde;": "\xe3", + "auml": "\xe4", + "auml;": "\xe4", + "awconint;": "\u2233", + "awint;": "\u2a11", + "bNot;": "\u2aed", + "backcong;": "\u224c", + "backepsilon;": "\u03f6", + "backprime;": "\u2035", + "backsim;": "\u223d", + "backsimeq;": "\u22cd", + "barvee;": "\u22bd", + "barwed;": "\u2305", + "barwedge;": "\u2305", + "bbrk;": "\u23b5", + "bbrktbrk;": "\u23b6", + "bcong;": "\u224c", + "bcy;": "\u0431", + "bdquo;": "\u201e", + "becaus;": "\u2235", + "because;": "\u2235", + "bemptyv;": "\u29b0", + "bepsi;": "\u03f6", + "bernou;": "\u212c", + "beta;": "\u03b2", + "beth;": "\u2136", + "between;": "\u226c", + "bfr;": "\U0001d51f", + "bigcap;": "\u22c2", + "bigcirc;": "\u25ef", + "bigcup;": "\u22c3", + "bigodot;": "\u2a00", + "bigoplus;": "\u2a01", + "bigotimes;": "\u2a02", + "bigsqcup;": "\u2a06", + "bigstar;": "\u2605", + "bigtriangledown;": "\u25bd", + "bigtriangleup;": "\u25b3", + "biguplus;": "\u2a04", + "bigvee;": "\u22c1", + "bigwedge;": "\u22c0", + "bkarow;": "\u290d", + "blacklozenge;": "\u29eb", + "blacksquare;": "\u25aa", + "blacktriangle;": "\u25b4", + "blacktriangledown;": "\u25be", + "blacktriangleleft;": "\u25c2", + "blacktriangleright;": "\u25b8", + "blank;": "\u2423", + "blk12;": "\u2592", + "blk14;": "\u2591", + "blk34;": "\u2593", + "block;": "\u2588", + "bne;": "=\u20e5", + "bnequiv;": "\u2261\u20e5", + "bnot;": "\u2310", + "bopf;": "\U0001d553", + "bot;": "\u22a5", + "bottom;": "\u22a5", + "bowtie;": "\u22c8", + "boxDL;": "\u2557", + "boxDR;": "\u2554", + "boxDl;": "\u2556", + "boxDr;": "\u2553", + "boxH;": "\u2550", + "boxHD;": "\u2566", + "boxHU;": "\u2569", + "boxHd;": "\u2564", + "boxHu;": "\u2567", + "boxUL;": "\u255d", + "boxUR;": "\u255a", + "boxUl;": "\u255c", + "boxUr;": "\u2559", + "boxV;": "\u2551", + "boxVH;": "\u256c", + "boxVL;": "\u2563", + "boxVR;": "\u2560", + "boxVh;": "\u256b", + "boxVl;": "\u2562", + "boxVr;": "\u255f", + "boxbox;": "\u29c9", + "boxdL;": "\u2555", + "boxdR;": "\u2552", + "boxdl;": "\u2510", + "boxdr;": "\u250c", + "boxh;": "\u2500", + "boxhD;": "\u2565", + "boxhU;": "\u2568", + "boxhd;": "\u252c", + "boxhu;": "\u2534", + "boxminus;": "\u229f", + "boxplus;": "\u229e", + "boxtimes;": "\u22a0", + "boxuL;": "\u255b", + "boxuR;": "\u2558", + "boxul;": "\u2518", + "boxur;": "\u2514", + "boxv;": "\u2502", + "boxvH;": "\u256a", + "boxvL;": "\u2561", + "boxvR;": "\u255e", + "boxvh;": "\u253c", + "boxvl;": "\u2524", + "boxvr;": "\u251c", + "bprime;": "\u2035", + "breve;": "\u02d8", + "brvbar": "\xa6", + "brvbar;": "\xa6", + "bscr;": "\U0001d4b7", + "bsemi;": "\u204f", + "bsim;": "\u223d", + "bsime;": "\u22cd", + "bsol;": "\\", + "bsolb;": "\u29c5", + "bsolhsub;": "\u27c8", + "bull;": "\u2022", + "bullet;": "\u2022", + "bump;": "\u224e", + "bumpE;": "\u2aae", + "bumpe;": "\u224f", + "bumpeq;": "\u224f", + "cacute;": "\u0107", + "cap;": "\u2229", + "capand;": "\u2a44", + "capbrcup;": "\u2a49", + "capcap;": "\u2a4b", + "capcup;": "\u2a47", + "capdot;": "\u2a40", + "caps;": "\u2229\ufe00", + "caret;": "\u2041", + "caron;": "\u02c7", + "ccaps;": "\u2a4d", + "ccaron;": "\u010d", + "ccedil": "\xe7", + "ccedil;": "\xe7", + "ccirc;": "\u0109", + "ccups;": "\u2a4c", + "ccupssm;": "\u2a50", + "cdot;": "\u010b", + "cedil": "\xb8", + "cedil;": "\xb8", + "cemptyv;": "\u29b2", + "cent": "\xa2", + "cent;": "\xa2", + "centerdot;": "\xb7", + "cfr;": "\U0001d520", + "chcy;": "\u0447", + "check;": "\u2713", + "checkmark;": "\u2713", + "chi;": "\u03c7", + "cir;": "\u25cb", + "cirE;": "\u29c3", + "circ;": "\u02c6", + "circeq;": "\u2257", + "circlearrowleft;": "\u21ba", + "circlearrowright;": "\u21bb", + "circledR;": "\xae", + "circledS;": "\u24c8", + "circledast;": "\u229b", + "circledcirc;": "\u229a", + "circleddash;": "\u229d", + "cire;": "\u2257", + "cirfnint;": "\u2a10", + "cirmid;": "\u2aef", + "cirscir;": "\u29c2", + "clubs;": "\u2663", + "clubsuit;": "\u2663", + "colon;": ":", + "colone;": "\u2254", + "coloneq;": "\u2254", + "comma;": ",", + "commat;": "@", + "comp;": "\u2201", + "compfn;": "\u2218", + "complement;": "\u2201", + "complexes;": "\u2102", + "cong;": "\u2245", + "congdot;": "\u2a6d", + "conint;": "\u222e", + "copf;": "\U0001d554", + "coprod;": "\u2210", + "copy": "\xa9", + "copy;": "\xa9", + "copysr;": "\u2117", + "crarr;": "\u21b5", + "cross;": "\u2717", + "cscr;": "\U0001d4b8", + "csub;": "\u2acf", + "csube;": "\u2ad1", + "csup;": "\u2ad0", + "csupe;": "\u2ad2", + "ctdot;": "\u22ef", + "cudarrl;": "\u2938", + "cudarrr;": "\u2935", + "cuepr;": "\u22de", + "cuesc;": "\u22df", + "cularr;": "\u21b6", + "cularrp;": "\u293d", + "cup;": "\u222a", + "cupbrcap;": "\u2a48", + "cupcap;": "\u2a46", + "cupcup;": "\u2a4a", + "cupdot;": "\u228d", + "cupor;": "\u2a45", + "cups;": "\u222a\ufe00", + "curarr;": "\u21b7", + "curarrm;": "\u293c", + "curlyeqprec;": "\u22de", + "curlyeqsucc;": "\u22df", + "curlyvee;": "\u22ce", + "curlywedge;": "\u22cf", + "curren": "\xa4", + "curren;": "\xa4", + "curvearrowleft;": "\u21b6", + "curvearrowright;": "\u21b7", + "cuvee;": "\u22ce", + "cuwed;": "\u22cf", + "cwconint;": "\u2232", + "cwint;": "\u2231", + "cylcty;": "\u232d", + "dArr;": "\u21d3", + "dHar;": "\u2965", + "dagger;": "\u2020", + "daleth;": "\u2138", + "darr;": "\u2193", + "dash;": "\u2010", + "dashv;": "\u22a3", + "dbkarow;": "\u290f", + "dblac;": "\u02dd", + "dcaron;": "\u010f", + "dcy;": "\u0434", + "dd;": "\u2146", + "ddagger;": "\u2021", + "ddarr;": "\u21ca", + "ddotseq;": "\u2a77", + "deg": "\xb0", + "deg;": "\xb0", + "delta;": "\u03b4", + "demptyv;": "\u29b1", + "dfisht;": "\u297f", + "dfr;": "\U0001d521", + "dharl;": "\u21c3", + "dharr;": "\u21c2", + "diam;": "\u22c4", + "diamond;": "\u22c4", + "diamondsuit;": "\u2666", + "diams;": "\u2666", + "die;": "\xa8", + "digamma;": "\u03dd", + "disin;": "\u22f2", + "div;": "\xf7", + "divide": "\xf7", + "divide;": "\xf7", + "divideontimes;": "\u22c7", + "divonx;": "\u22c7", + "djcy;": "\u0452", + "dlcorn;": "\u231e", + "dlcrop;": "\u230d", + "dollar;": "$", + "dopf;": "\U0001d555", + "dot;": "\u02d9", + "doteq;": "\u2250", + "doteqdot;": "\u2251", + "dotminus;": "\u2238", + "dotplus;": "\u2214", + "dotsquare;": "\u22a1", + "doublebarwedge;": "\u2306", + "downarrow;": "\u2193", + "downdownarrows;": "\u21ca", + "downharpoonleft;": "\u21c3", + "downharpoonright;": "\u21c2", + "drbkarow;": "\u2910", + "drcorn;": "\u231f", + "drcrop;": "\u230c", + "dscr;": "\U0001d4b9", + "dscy;": "\u0455", + "dsol;": "\u29f6", + "dstrok;": "\u0111", + "dtdot;": "\u22f1", + "dtri;": "\u25bf", + "dtrif;": "\u25be", + "duarr;": "\u21f5", + "duhar;": "\u296f", + "dwangle;": "\u29a6", + "dzcy;": "\u045f", + "dzigrarr;": "\u27ff", + "eDDot;": "\u2a77", + "eDot;": "\u2251", + "eacute": "\xe9", + "eacute;": "\xe9", + "easter;": "\u2a6e", + "ecaron;": "\u011b", + "ecir;": "\u2256", + "ecirc": "\xea", + "ecirc;": "\xea", + "ecolon;": "\u2255", + "ecy;": "\u044d", + "edot;": "\u0117", + "ee;": "\u2147", + "efDot;": "\u2252", + "efr;": "\U0001d522", + "eg;": "\u2a9a", + "egrave": "\xe8", + "egrave;": "\xe8", + "egs;": "\u2a96", + "egsdot;": "\u2a98", + "el;": "\u2a99", + "elinters;": "\u23e7", + "ell;": "\u2113", + "els;": "\u2a95", + "elsdot;": "\u2a97", + "emacr;": "\u0113", + "empty;": "\u2205", + "emptyset;": "\u2205", + "emptyv;": "\u2205", + "emsp13;": "\u2004", + "emsp14;": "\u2005", + "emsp;": "\u2003", + "eng;": "\u014b", + "ensp;": "\u2002", + "eogon;": "\u0119", + "eopf;": "\U0001d556", + "epar;": "\u22d5", + "eparsl;": "\u29e3", + "eplus;": "\u2a71", + "epsi;": "\u03b5", + "epsilon;": "\u03b5", + "epsiv;": "\u03f5", + "eqcirc;": "\u2256", + "eqcolon;": "\u2255", + "eqsim;": "\u2242", + "eqslantgtr;": "\u2a96", + "eqslantless;": "\u2a95", + "equals;": "=", + "equest;": "\u225f", + "equiv;": "\u2261", + "equivDD;": "\u2a78", + "eqvparsl;": "\u29e5", + "erDot;": "\u2253", + "erarr;": "\u2971", + "escr;": "\u212f", + "esdot;": "\u2250", + "esim;": "\u2242", + "eta;": "\u03b7", + "eth": "\xf0", + "eth;": "\xf0", + "euml": "\xeb", + "euml;": "\xeb", + "euro;": "\u20ac", + "excl;": "!", + "exist;": "\u2203", + "expectation;": "\u2130", + "exponentiale;": "\u2147", + "fallingdotseq;": "\u2252", + "fcy;": "\u0444", + "female;": "\u2640", + "ffilig;": "\ufb03", + "fflig;": "\ufb00", + "ffllig;": "\ufb04", + "ffr;": "\U0001d523", + "filig;": "\ufb01", + "fjlig;": "fj", + "flat;": "\u266d", + "fllig;": "\ufb02", + "fltns;": "\u25b1", + "fnof;": "\u0192", + "fopf;": "\U0001d557", + "forall;": "\u2200", + "fork;": "\u22d4", + "forkv;": "\u2ad9", + "fpartint;": "\u2a0d", + "frac12": "\xbd", + "frac12;": "\xbd", + "frac13;": "\u2153", + "frac14": "\xbc", + "frac14;": "\xbc", + "frac15;": "\u2155", + "frac16;": "\u2159", + "frac18;": "\u215b", + "frac23;": "\u2154", + "frac25;": "\u2156", + "frac34": "\xbe", + "frac34;": "\xbe", + "frac35;": "\u2157", + "frac38;": "\u215c", + "frac45;": "\u2158", + "frac56;": "\u215a", + "frac58;": "\u215d", + "frac78;": "\u215e", + "frasl;": "\u2044", + "frown;": "\u2322", + "fscr;": "\U0001d4bb", + "gE;": "\u2267", + "gEl;": "\u2a8c", + "gacute;": "\u01f5", + "gamma;": "\u03b3", + "gammad;": "\u03dd", + "gap;": "\u2a86", + "gbreve;": "\u011f", + "gcirc;": "\u011d", + "gcy;": "\u0433", + "gdot;": "\u0121", + "ge;": "\u2265", + "gel;": "\u22db", + "geq;": "\u2265", + "geqq;": "\u2267", + "geqslant;": "\u2a7e", + "ges;": "\u2a7e", + "gescc;": "\u2aa9", + "gesdot;": "\u2a80", + "gesdoto;": "\u2a82", + "gesdotol;": "\u2a84", + "gesl;": "\u22db\ufe00", + "gesles;": "\u2a94", + "gfr;": "\U0001d524", + "gg;": "\u226b", + "ggg;": "\u22d9", + "gimel;": "\u2137", + "gjcy;": "\u0453", + "gl;": "\u2277", + "glE;": "\u2a92", + "gla;": "\u2aa5", + "glj;": "\u2aa4", + "gnE;": "\u2269", + "gnap;": "\u2a8a", + "gnapprox;": "\u2a8a", + "gne;": "\u2a88", + "gneq;": "\u2a88", + "gneqq;": "\u2269", + "gnsim;": "\u22e7", + "gopf;": "\U0001d558", + "grave;": "`", + "gscr;": "\u210a", + "gsim;": "\u2273", + "gsime;": "\u2a8e", + "gsiml;": "\u2a90", + "gt": ">", + "gt;": ">", + "gtcc;": "\u2aa7", + "gtcir;": "\u2a7a", + "gtdot;": "\u22d7", + "gtlPar;": "\u2995", + "gtquest;": "\u2a7c", + "gtrapprox;": "\u2a86", + "gtrarr;": "\u2978", + "gtrdot;": "\u22d7", + "gtreqless;": "\u22db", + "gtreqqless;": "\u2a8c", + "gtrless;": "\u2277", + "gtrsim;": "\u2273", + "gvertneqq;": "\u2269\ufe00", + "gvnE;": "\u2269\ufe00", + "hArr;": "\u21d4", + "hairsp;": "\u200a", + "half;": "\xbd", + "hamilt;": "\u210b", + "hardcy;": "\u044a", + "harr;": "\u2194", + "harrcir;": "\u2948", + "harrw;": "\u21ad", + "hbar;": "\u210f", + "hcirc;": "\u0125", + "hearts;": "\u2665", + "heartsuit;": "\u2665", + "hellip;": "\u2026", + "hercon;": "\u22b9", + "hfr;": "\U0001d525", + "hksearow;": "\u2925", + "hkswarow;": "\u2926", + "hoarr;": "\u21ff", + "homtht;": "\u223b", + "hookleftarrow;": "\u21a9", + "hookrightarrow;": "\u21aa", + "hopf;": "\U0001d559", + "horbar;": "\u2015", + "hscr;": "\U0001d4bd", + "hslash;": "\u210f", + "hstrok;": "\u0127", + "hybull;": "\u2043", + "hyphen;": "\u2010", + "iacute": "\xed", + "iacute;": "\xed", + "ic;": "\u2063", + "icirc": "\xee", + "icirc;": "\xee", + "icy;": "\u0438", + "iecy;": "\u0435", + "iexcl": "\xa1", + "iexcl;": "\xa1", + "iff;": "\u21d4", + "ifr;": "\U0001d526", + "igrave": "\xec", + "igrave;": "\xec", + "ii;": "\u2148", + "iiiint;": "\u2a0c", + "iiint;": "\u222d", + "iinfin;": "\u29dc", + "iiota;": "\u2129", + "ijlig;": "\u0133", + "imacr;": "\u012b", + "image;": "\u2111", + "imagline;": "\u2110", + "imagpart;": "\u2111", + "imath;": "\u0131", + "imof;": "\u22b7", + "imped;": "\u01b5", + "in;": "\u2208", + "incare;": "\u2105", + "infin;": "\u221e", + "infintie;": "\u29dd", + "inodot;": "\u0131", + "int;": "\u222b", + "intcal;": "\u22ba", + "integers;": "\u2124", + "intercal;": "\u22ba", + "intlarhk;": "\u2a17", + "intprod;": "\u2a3c", + "iocy;": "\u0451", + "iogon;": "\u012f", + "iopf;": "\U0001d55a", + "iota;": "\u03b9", + "iprod;": "\u2a3c", + "iquest": "\xbf", + "iquest;": "\xbf", + "iscr;": "\U0001d4be", + "isin;": "\u2208", + "isinE;": "\u22f9", + "isindot;": "\u22f5", + "isins;": "\u22f4", + "isinsv;": "\u22f3", + "isinv;": "\u2208", + "it;": "\u2062", + "itilde;": "\u0129", + "iukcy;": "\u0456", + "iuml": "\xef", + "iuml;": "\xef", + "jcirc;": "\u0135", + "jcy;": "\u0439", + "jfr;": "\U0001d527", + "jmath;": "\u0237", + "jopf;": "\U0001d55b", + "jscr;": "\U0001d4bf", + "jsercy;": "\u0458", + "jukcy;": "\u0454", + "kappa;": "\u03ba", + "kappav;": "\u03f0", + "kcedil;": "\u0137", + "kcy;": "\u043a", + "kfr;": "\U0001d528", + "kgreen;": "\u0138", + "khcy;": "\u0445", + "kjcy;": "\u045c", + "kopf;": "\U0001d55c", + "kscr;": "\U0001d4c0", + "lAarr;": "\u21da", + "lArr;": "\u21d0", + "lAtail;": "\u291b", + "lBarr;": "\u290e", + "lE;": "\u2266", + "lEg;": "\u2a8b", + "lHar;": "\u2962", + "lacute;": "\u013a", + "laemptyv;": "\u29b4", + "lagran;": "\u2112", + "lambda;": "\u03bb", + "lang;": "\u27e8", + "langd;": "\u2991", + "langle;": "\u27e8", + "lap;": "\u2a85", + "laquo": "\xab", + "laquo;": "\xab", + "larr;": "\u2190", + "larrb;": "\u21e4", + "larrbfs;": "\u291f", + "larrfs;": "\u291d", + "larrhk;": "\u21a9", + "larrlp;": "\u21ab", + "larrpl;": "\u2939", + "larrsim;": "\u2973", + "larrtl;": "\u21a2", + "lat;": "\u2aab", + "latail;": "\u2919", + "late;": "\u2aad", + "lates;": "\u2aad\ufe00", + "lbarr;": "\u290c", + "lbbrk;": "\u2772", + "lbrace;": "{", + "lbrack;": "[", + "lbrke;": "\u298b", + "lbrksld;": "\u298f", + "lbrkslu;": "\u298d", + "lcaron;": "\u013e", + "lcedil;": "\u013c", + "lceil;": "\u2308", + "lcub;": "{", + "lcy;": "\u043b", + "ldca;": "\u2936", + "ldquo;": "\u201c", + "ldquor;": "\u201e", + "ldrdhar;": "\u2967", + "ldrushar;": "\u294b", + "ldsh;": "\u21b2", + "le;": "\u2264", + "leftarrow;": "\u2190", + "leftarrowtail;": "\u21a2", + "leftharpoondown;": "\u21bd", + "leftharpoonup;": "\u21bc", + "leftleftarrows;": "\u21c7", + "leftrightarrow;": "\u2194", + "leftrightarrows;": "\u21c6", + "leftrightharpoons;": "\u21cb", + "leftrightsquigarrow;": "\u21ad", + "leftthreetimes;": "\u22cb", + "leg;": "\u22da", + "leq;": "\u2264", + "leqq;": "\u2266", + "leqslant;": "\u2a7d", + "les;": "\u2a7d", + "lescc;": "\u2aa8", + "lesdot;": "\u2a7f", + "lesdoto;": "\u2a81", + "lesdotor;": "\u2a83", + "lesg;": "\u22da\ufe00", + "lesges;": "\u2a93", + "lessapprox;": "\u2a85", + "lessdot;": "\u22d6", + "lesseqgtr;": "\u22da", + "lesseqqgtr;": "\u2a8b", + "lessgtr;": "\u2276", + "lesssim;": "\u2272", + "lfisht;": "\u297c", + "lfloor;": "\u230a", + "lfr;": "\U0001d529", + "lg;": "\u2276", + "lgE;": "\u2a91", + "lhard;": "\u21bd", + "lharu;": "\u21bc", + "lharul;": "\u296a", + "lhblk;": "\u2584", + "ljcy;": "\u0459", + "ll;": "\u226a", + "llarr;": "\u21c7", + "llcorner;": "\u231e", + "llhard;": "\u296b", + "lltri;": "\u25fa", + "lmidot;": "\u0140", + "lmoust;": "\u23b0", + "lmoustache;": "\u23b0", + "lnE;": "\u2268", + "lnap;": "\u2a89", + "lnapprox;": "\u2a89", + "lne;": "\u2a87", + "lneq;": "\u2a87", + "lneqq;": "\u2268", + "lnsim;": "\u22e6", + "loang;": "\u27ec", + "loarr;": "\u21fd", + "lobrk;": "\u27e6", + "longleftarrow;": "\u27f5", + "longleftrightarrow;": "\u27f7", + "longmapsto;": "\u27fc", + "longrightarrow;": "\u27f6", + "looparrowleft;": "\u21ab", + "looparrowright;": "\u21ac", + "lopar;": "\u2985", + "lopf;": "\U0001d55d", + "loplus;": "\u2a2d", + "lotimes;": "\u2a34", + "lowast;": "\u2217", + "lowbar;": "_", + "loz;": "\u25ca", + "lozenge;": "\u25ca", + "lozf;": "\u29eb", + "lpar;": "(", + "lparlt;": "\u2993", + "lrarr;": "\u21c6", + "lrcorner;": "\u231f", + "lrhar;": "\u21cb", + "lrhard;": "\u296d", + "lrm;": "\u200e", + "lrtri;": "\u22bf", + "lsaquo;": "\u2039", + "lscr;": "\U0001d4c1", + "lsh;": "\u21b0", + "lsim;": "\u2272", + "lsime;": "\u2a8d", + "lsimg;": "\u2a8f", + "lsqb;": "[", + "lsquo;": "\u2018", + "lsquor;": "\u201a", + "lstrok;": "\u0142", + "lt": "<", + "lt;": "<", + "ltcc;": "\u2aa6", + "ltcir;": "\u2a79", + "ltdot;": "\u22d6", + "lthree;": "\u22cb", + "ltimes;": "\u22c9", + "ltlarr;": "\u2976", + "ltquest;": "\u2a7b", + "ltrPar;": "\u2996", + "ltri;": "\u25c3", + "ltrie;": "\u22b4", + "ltrif;": "\u25c2", + "lurdshar;": "\u294a", + "luruhar;": "\u2966", + "lvertneqq;": "\u2268\ufe00", + "lvnE;": "\u2268\ufe00", + "mDDot;": "\u223a", + "macr": "\xaf", + "macr;": "\xaf", + "male;": "\u2642", + "malt;": "\u2720", + "maltese;": "\u2720", + "map;": "\u21a6", + "mapsto;": "\u21a6", + "mapstodown;": "\u21a7", + "mapstoleft;": "\u21a4", + "mapstoup;": "\u21a5", + "marker;": "\u25ae", + "mcomma;": "\u2a29", + "mcy;": "\u043c", + "mdash;": "\u2014", + "measuredangle;": "\u2221", + "mfr;": "\U0001d52a", + "mho;": "\u2127", + "micro": "\xb5", + "micro;": "\xb5", + "mid;": "\u2223", + "midast;": "*", + "midcir;": "\u2af0", + "middot": "\xb7", + "middot;": "\xb7", + "minus;": "\u2212", + "minusb;": "\u229f", + "minusd;": "\u2238", + "minusdu;": "\u2a2a", + "mlcp;": "\u2adb", + "mldr;": "\u2026", + "mnplus;": "\u2213", + "models;": "\u22a7", + "mopf;": "\U0001d55e", + "mp;": "\u2213", + "mscr;": "\U0001d4c2", + "mstpos;": "\u223e", + "mu;": "\u03bc", + "multimap;": "\u22b8", + "mumap;": "\u22b8", + "nGg;": "\u22d9\u0338", + "nGt;": "\u226b\u20d2", + "nGtv;": "\u226b\u0338", + "nLeftarrow;": "\u21cd", + "nLeftrightarrow;": "\u21ce", + "nLl;": "\u22d8\u0338", + "nLt;": "\u226a\u20d2", + "nLtv;": "\u226a\u0338", + "nRightarrow;": "\u21cf", + "nVDash;": "\u22af", + "nVdash;": "\u22ae", + "nabla;": "\u2207", + "nacute;": "\u0144", + "nang;": "\u2220\u20d2", + "nap;": "\u2249", + "napE;": "\u2a70\u0338", + "napid;": "\u224b\u0338", + "napos;": "\u0149", + "napprox;": "\u2249", + "natur;": "\u266e", + "natural;": "\u266e", + "naturals;": "\u2115", + "nbsp": "\xa0", + "nbsp;": "\xa0", + "nbump;": "\u224e\u0338", + "nbumpe;": "\u224f\u0338", + "ncap;": "\u2a43", + "ncaron;": "\u0148", + "ncedil;": "\u0146", + "ncong;": "\u2247", + "ncongdot;": "\u2a6d\u0338", + "ncup;": "\u2a42", + "ncy;": "\u043d", + "ndash;": "\u2013", + "ne;": "\u2260", + "neArr;": "\u21d7", + "nearhk;": "\u2924", + "nearr;": "\u2197", + "nearrow;": "\u2197", + "nedot;": "\u2250\u0338", + "nequiv;": "\u2262", + "nesear;": "\u2928", + "nesim;": "\u2242\u0338", + "nexist;": "\u2204", + "nexists;": "\u2204", + "nfr;": "\U0001d52b", + "ngE;": "\u2267\u0338", + "nge;": "\u2271", + "ngeq;": "\u2271", + "ngeqq;": "\u2267\u0338", + "ngeqslant;": "\u2a7e\u0338", + "nges;": "\u2a7e\u0338", + "ngsim;": "\u2275", + "ngt;": "\u226f", + "ngtr;": "\u226f", + "nhArr;": "\u21ce", + "nharr;": "\u21ae", + "nhpar;": "\u2af2", + "ni;": "\u220b", + "nis;": "\u22fc", + "nisd;": "\u22fa", + "niv;": "\u220b", + "njcy;": "\u045a", + "nlArr;": "\u21cd", + "nlE;": "\u2266\u0338", + "nlarr;": "\u219a", + "nldr;": "\u2025", + "nle;": "\u2270", + "nleftarrow;": "\u219a", + "nleftrightarrow;": "\u21ae", + "nleq;": "\u2270", + "nleqq;": "\u2266\u0338", + "nleqslant;": "\u2a7d\u0338", + "nles;": "\u2a7d\u0338", + "nless;": "\u226e", + "nlsim;": "\u2274", + "nlt;": "\u226e", + "nltri;": "\u22ea", + "nltrie;": "\u22ec", + "nmid;": "\u2224", + "nopf;": "\U0001d55f", + "not": "\xac", + "not;": "\xac", + "notin;": "\u2209", + "notinE;": "\u22f9\u0338", + "notindot;": "\u22f5\u0338", + "notinva;": "\u2209", + "notinvb;": "\u22f7", + "notinvc;": "\u22f6", + "notni;": "\u220c", + "notniva;": "\u220c", + "notnivb;": "\u22fe", + "notnivc;": "\u22fd", + "npar;": "\u2226", + "nparallel;": "\u2226", + "nparsl;": "\u2afd\u20e5", + "npart;": "\u2202\u0338", + "npolint;": "\u2a14", + "npr;": "\u2280", + "nprcue;": "\u22e0", + "npre;": "\u2aaf\u0338", + "nprec;": "\u2280", + "npreceq;": "\u2aaf\u0338", + "nrArr;": "\u21cf", + "nrarr;": "\u219b", + "nrarrc;": "\u2933\u0338", + "nrarrw;": "\u219d\u0338", + "nrightarrow;": "\u219b", + "nrtri;": "\u22eb", + "nrtrie;": "\u22ed", + "nsc;": "\u2281", + "nsccue;": "\u22e1", + "nsce;": "\u2ab0\u0338", + "nscr;": "\U0001d4c3", + "nshortmid;": "\u2224", + "nshortparallel;": "\u2226", + "nsim;": "\u2241", + "nsime;": "\u2244", + "nsimeq;": "\u2244", + "nsmid;": "\u2224", + "nspar;": "\u2226", + "nsqsube;": "\u22e2", + "nsqsupe;": "\u22e3", + "nsub;": "\u2284", + "nsubE;": "\u2ac5\u0338", + "nsube;": "\u2288", + "nsubset;": "\u2282\u20d2", + "nsubseteq;": "\u2288", + "nsubseteqq;": "\u2ac5\u0338", + "nsucc;": "\u2281", + "nsucceq;": "\u2ab0\u0338", + "nsup;": "\u2285", + "nsupE;": "\u2ac6\u0338", + "nsupe;": "\u2289", + "nsupset;": "\u2283\u20d2", + "nsupseteq;": "\u2289", + "nsupseteqq;": "\u2ac6\u0338", + "ntgl;": "\u2279", + "ntilde": "\xf1", + "ntilde;": "\xf1", + "ntlg;": "\u2278", + "ntriangleleft;": "\u22ea", + "ntrianglelefteq;": "\u22ec", + "ntriangleright;": "\u22eb", + "ntrianglerighteq;": "\u22ed", + "nu;": "\u03bd", + "num;": "#", + "numero;": "\u2116", + "numsp;": "\u2007", + "nvDash;": "\u22ad", + "nvHarr;": "\u2904", + "nvap;": "\u224d\u20d2", + "nvdash;": "\u22ac", + "nvge;": "\u2265\u20d2", + "nvgt;": ">\u20d2", + "nvinfin;": "\u29de", + "nvlArr;": "\u2902", + "nvle;": "\u2264\u20d2", + "nvlt;": "<\u20d2", + "nvltrie;": "\u22b4\u20d2", + "nvrArr;": "\u2903", + "nvrtrie;": "\u22b5\u20d2", + "nvsim;": "\u223c\u20d2", + "nwArr;": "\u21d6", + "nwarhk;": "\u2923", + "nwarr;": "\u2196", + "nwarrow;": "\u2196", + "nwnear;": "\u2927", + "oS;": "\u24c8", + "oacute": "\xf3", + "oacute;": "\xf3", + "oast;": "\u229b", + "ocir;": "\u229a", + "ocirc": "\xf4", + "ocirc;": "\xf4", + "ocy;": "\u043e", + "odash;": "\u229d", + "odblac;": "\u0151", + "odiv;": "\u2a38", + "odot;": "\u2299", + "odsold;": "\u29bc", + "oelig;": "\u0153", + "ofcir;": "\u29bf", + "ofr;": "\U0001d52c", + "ogon;": "\u02db", + "ograve": "\xf2", + "ograve;": "\xf2", + "ogt;": "\u29c1", + "ohbar;": "\u29b5", + "ohm;": "\u03a9", + "oint;": "\u222e", + "olarr;": "\u21ba", + "olcir;": "\u29be", + "olcross;": "\u29bb", + "oline;": "\u203e", + "olt;": "\u29c0", + "omacr;": "\u014d", + "omega;": "\u03c9", + "omicron;": "\u03bf", + "omid;": "\u29b6", + "ominus;": "\u2296", + "oopf;": "\U0001d560", + "opar;": "\u29b7", + "operp;": "\u29b9", + "oplus;": "\u2295", + "or;": "\u2228", + "orarr;": "\u21bb", + "ord;": "\u2a5d", + "order;": "\u2134", + "orderof;": "\u2134", + "ordf": "\xaa", + "ordf;": "\xaa", + "ordm": "\xba", + "ordm;": "\xba", + "origof;": "\u22b6", + "oror;": "\u2a56", + "orslope;": "\u2a57", + "orv;": "\u2a5b", + "oscr;": "\u2134", + "oslash": "\xf8", + "oslash;": "\xf8", + "osol;": "\u2298", + "otilde": "\xf5", + "otilde;": "\xf5", + "otimes;": "\u2297", + "otimesas;": "\u2a36", + "ouml": "\xf6", + "ouml;": "\xf6", + "ovbar;": "\u233d", + "par;": "\u2225", + "para": "\xb6", + "para;": "\xb6", + "parallel;": "\u2225", + "parsim;": "\u2af3", + "parsl;": "\u2afd", + "part;": "\u2202", + "pcy;": "\u043f", + "percnt;": "%", + "period;": ".", + "permil;": "\u2030", + "perp;": "\u22a5", + "pertenk;": "\u2031", + "pfr;": "\U0001d52d", + "phi;": "\u03c6", + "phiv;": "\u03d5", + "phmmat;": "\u2133", + "phone;": "\u260e", + "pi;": "\u03c0", + "pitchfork;": "\u22d4", + "piv;": "\u03d6", + "planck;": "\u210f", + "planckh;": "\u210e", + "plankv;": "\u210f", + "plus;": "+", + "plusacir;": "\u2a23", + "plusb;": "\u229e", + "pluscir;": "\u2a22", + "plusdo;": "\u2214", + "plusdu;": "\u2a25", + "pluse;": "\u2a72", + "plusmn": "\xb1", + "plusmn;": "\xb1", + "plussim;": "\u2a26", + "plustwo;": "\u2a27", + "pm;": "\xb1", + "pointint;": "\u2a15", + "popf;": "\U0001d561", + "pound": "\xa3", + "pound;": "\xa3", + "pr;": "\u227a", + "prE;": "\u2ab3", + "prap;": "\u2ab7", + "prcue;": "\u227c", + "pre;": "\u2aaf", + "prec;": "\u227a", + "precapprox;": "\u2ab7", + "preccurlyeq;": "\u227c", + "preceq;": "\u2aaf", + "precnapprox;": "\u2ab9", + "precneqq;": "\u2ab5", + "precnsim;": "\u22e8", + "precsim;": "\u227e", + "prime;": "\u2032", + "primes;": "\u2119", + "prnE;": "\u2ab5", + "prnap;": "\u2ab9", + "prnsim;": "\u22e8", + "prod;": "\u220f", + "profalar;": "\u232e", + "profline;": "\u2312", + "profsurf;": "\u2313", + "prop;": "\u221d", + "propto;": "\u221d", + "prsim;": "\u227e", + "prurel;": "\u22b0", + "pscr;": "\U0001d4c5", + "psi;": "\u03c8", + "puncsp;": "\u2008", + "qfr;": "\U0001d52e", + "qint;": "\u2a0c", + "qopf;": "\U0001d562", + "qprime;": "\u2057", + "qscr;": "\U0001d4c6", + "quaternions;": "\u210d", + "quatint;": "\u2a16", + "quest;": "?", + "questeq;": "\u225f", + "quot": "\"", + "quot;": "\"", + "rAarr;": "\u21db", + "rArr;": "\u21d2", + "rAtail;": "\u291c", + "rBarr;": "\u290f", + "rHar;": "\u2964", + "race;": "\u223d\u0331", + "racute;": "\u0155", + "radic;": "\u221a", + "raemptyv;": "\u29b3", + "rang;": "\u27e9", + "rangd;": "\u2992", + "range;": "\u29a5", + "rangle;": "\u27e9", + "raquo": "\xbb", + "raquo;": "\xbb", + "rarr;": "\u2192", + "rarrap;": "\u2975", + "rarrb;": "\u21e5", + "rarrbfs;": "\u2920", + "rarrc;": "\u2933", + "rarrfs;": "\u291e", + "rarrhk;": "\u21aa", + "rarrlp;": "\u21ac", + "rarrpl;": "\u2945", + "rarrsim;": "\u2974", + "rarrtl;": "\u21a3", + "rarrw;": "\u219d", + "ratail;": "\u291a", + "ratio;": "\u2236", + "rationals;": "\u211a", + "rbarr;": "\u290d", + "rbbrk;": "\u2773", + "rbrace;": "}", + "rbrack;": "]", + "rbrke;": "\u298c", + "rbrksld;": "\u298e", + "rbrkslu;": "\u2990", + "rcaron;": "\u0159", + "rcedil;": "\u0157", + "rceil;": "\u2309", + "rcub;": "}", + "rcy;": "\u0440", + "rdca;": "\u2937", + "rdldhar;": "\u2969", + "rdquo;": "\u201d", + "rdquor;": "\u201d", + "rdsh;": "\u21b3", + "real;": "\u211c", + "realine;": "\u211b", + "realpart;": "\u211c", + "reals;": "\u211d", + "rect;": "\u25ad", + "reg": "\xae", + "reg;": "\xae", + "rfisht;": "\u297d", + "rfloor;": "\u230b", + "rfr;": "\U0001d52f", + "rhard;": "\u21c1", + "rharu;": "\u21c0", + "rharul;": "\u296c", + "rho;": "\u03c1", + "rhov;": "\u03f1", + "rightarrow;": "\u2192", + "rightarrowtail;": "\u21a3", + "rightharpoondown;": "\u21c1", + "rightharpoonup;": "\u21c0", + "rightleftarrows;": "\u21c4", + "rightleftharpoons;": "\u21cc", + "rightrightarrows;": "\u21c9", + "rightsquigarrow;": "\u219d", + "rightthreetimes;": "\u22cc", + "ring;": "\u02da", + "risingdotseq;": "\u2253", + "rlarr;": "\u21c4", + "rlhar;": "\u21cc", + "rlm;": "\u200f", + "rmoust;": "\u23b1", + "rmoustache;": "\u23b1", + "rnmid;": "\u2aee", + "roang;": "\u27ed", + "roarr;": "\u21fe", + "robrk;": "\u27e7", + "ropar;": "\u2986", + "ropf;": "\U0001d563", + "roplus;": "\u2a2e", + "rotimes;": "\u2a35", + "rpar;": ")", + "rpargt;": "\u2994", + "rppolint;": "\u2a12", + "rrarr;": "\u21c9", + "rsaquo;": "\u203a", + "rscr;": "\U0001d4c7", + "rsh;": "\u21b1", + "rsqb;": "]", + "rsquo;": "\u2019", + "rsquor;": "\u2019", + "rthree;": "\u22cc", + "rtimes;": "\u22ca", + "rtri;": "\u25b9", + "rtrie;": "\u22b5", + "rtrif;": "\u25b8", + "rtriltri;": "\u29ce", + "ruluhar;": "\u2968", + "rx;": "\u211e", + "sacute;": "\u015b", + "sbquo;": "\u201a", + "sc;": "\u227b", + "scE;": "\u2ab4", + "scap;": "\u2ab8", + "scaron;": "\u0161", + "sccue;": "\u227d", + "sce;": "\u2ab0", + "scedil;": "\u015f", + "scirc;": "\u015d", + "scnE;": "\u2ab6", + "scnap;": "\u2aba", + "scnsim;": "\u22e9", + "scpolint;": "\u2a13", + "scsim;": "\u227f", + "scy;": "\u0441", + "sdot;": "\u22c5", + "sdotb;": "\u22a1", + "sdote;": "\u2a66", + "seArr;": "\u21d8", + "searhk;": "\u2925", + "searr;": "\u2198", + "searrow;": "\u2198", + "sect": "\xa7", + "sect;": "\xa7", + "semi;": ";", + "seswar;": "\u2929", + "setminus;": "\u2216", + "setmn;": "\u2216", + "sext;": "\u2736", + "sfr;": "\U0001d530", + "sfrown;": "\u2322", + "sharp;": "\u266f", + "shchcy;": "\u0449", + "shcy;": "\u0448", + "shortmid;": "\u2223", + "shortparallel;": "\u2225", + "shy": "\xad", + "shy;": "\xad", + "sigma;": "\u03c3", + "sigmaf;": "\u03c2", + "sigmav;": "\u03c2", + "sim;": "\u223c", + "simdot;": "\u2a6a", + "sime;": "\u2243", + "simeq;": "\u2243", + "simg;": "\u2a9e", + "simgE;": "\u2aa0", + "siml;": "\u2a9d", + "simlE;": "\u2a9f", + "simne;": "\u2246", + "simplus;": "\u2a24", + "simrarr;": "\u2972", + "slarr;": "\u2190", + "smallsetminus;": "\u2216", + "smashp;": "\u2a33", + "smeparsl;": "\u29e4", + "smid;": "\u2223", + "smile;": "\u2323", + "smt;": "\u2aaa", + "smte;": "\u2aac", + "smtes;": "\u2aac\ufe00", + "softcy;": "\u044c", + "sol;": "/", + "solb;": "\u29c4", + "solbar;": "\u233f", + "sopf;": "\U0001d564", + "spades;": "\u2660", + "spadesuit;": "\u2660", + "spar;": "\u2225", + "sqcap;": "\u2293", + "sqcaps;": "\u2293\ufe00", + "sqcup;": "\u2294", + "sqcups;": "\u2294\ufe00", + "sqsub;": "\u228f", + "sqsube;": "\u2291", + "sqsubset;": "\u228f", + "sqsubseteq;": "\u2291", + "sqsup;": "\u2290", + "sqsupe;": "\u2292", + "sqsupset;": "\u2290", + "sqsupseteq;": "\u2292", + "squ;": "\u25a1", + "square;": "\u25a1", + "squarf;": "\u25aa", + "squf;": "\u25aa", + "srarr;": "\u2192", + "sscr;": "\U0001d4c8", + "ssetmn;": "\u2216", + "ssmile;": "\u2323", + "sstarf;": "\u22c6", + "star;": "\u2606", + "starf;": "\u2605", + "straightepsilon;": "\u03f5", + "straightphi;": "\u03d5", + "strns;": "\xaf", + "sub;": "\u2282", + "subE;": "\u2ac5", + "subdot;": "\u2abd", + "sube;": "\u2286", + "subedot;": "\u2ac3", + "submult;": "\u2ac1", + "subnE;": "\u2acb", + "subne;": "\u228a", + "subplus;": "\u2abf", + "subrarr;": "\u2979", + "subset;": "\u2282", + "subseteq;": "\u2286", + "subseteqq;": "\u2ac5", + "subsetneq;": "\u228a", + "subsetneqq;": "\u2acb", + "subsim;": "\u2ac7", + "subsub;": "\u2ad5", + "subsup;": "\u2ad3", + "succ;": "\u227b", + "succapprox;": "\u2ab8", + "succcurlyeq;": "\u227d", + "succeq;": "\u2ab0", + "succnapprox;": "\u2aba", + "succneqq;": "\u2ab6", + "succnsim;": "\u22e9", + "succsim;": "\u227f", + "sum;": "\u2211", + "sung;": "\u266a", + "sup1": "\xb9", + "sup1;": "\xb9", + "sup2": "\xb2", + "sup2;": "\xb2", + "sup3": "\xb3", + "sup3;": "\xb3", + "sup;": "\u2283", + "supE;": "\u2ac6", + "supdot;": "\u2abe", + "supdsub;": "\u2ad8", + "supe;": "\u2287", + "supedot;": "\u2ac4", + "suphsol;": "\u27c9", + "suphsub;": "\u2ad7", + "suplarr;": "\u297b", + "supmult;": "\u2ac2", + "supnE;": "\u2acc", + "supne;": "\u228b", + "supplus;": "\u2ac0", + "supset;": "\u2283", + "supseteq;": "\u2287", + "supseteqq;": "\u2ac6", + "supsetneq;": "\u228b", + "supsetneqq;": "\u2acc", + "supsim;": "\u2ac8", + "supsub;": "\u2ad4", + "supsup;": "\u2ad6", + "swArr;": "\u21d9", + "swarhk;": "\u2926", + "swarr;": "\u2199", + "swarrow;": "\u2199", + "swnwar;": "\u292a", + "szlig": "\xdf", + "szlig;": "\xdf", + "target;": "\u2316", + "tau;": "\u03c4", + "tbrk;": "\u23b4", + "tcaron;": "\u0165", + "tcedil;": "\u0163", + "tcy;": "\u0442", + "tdot;": "\u20db", + "telrec;": "\u2315", + "tfr;": "\U0001d531", + "there4;": "\u2234", + "therefore;": "\u2234", + "theta;": "\u03b8", + "thetasym;": "\u03d1", + "thetav;": "\u03d1", + "thickapprox;": "\u2248", + "thicksim;": "\u223c", + "thinsp;": "\u2009", + "thkap;": "\u2248", + "thksim;": "\u223c", + "thorn": "\xfe", + "thorn;": "\xfe", + "tilde;": "\u02dc", + "times": "\xd7", + "times;": "\xd7", + "timesb;": "\u22a0", + "timesbar;": "\u2a31", + "timesd;": "\u2a30", + "tint;": "\u222d", + "toea;": "\u2928", + "top;": "\u22a4", + "topbot;": "\u2336", + "topcir;": "\u2af1", + "topf;": "\U0001d565", + "topfork;": "\u2ada", + "tosa;": "\u2929", + "tprime;": "\u2034", + "trade;": "\u2122", + "triangle;": "\u25b5", + "triangledown;": "\u25bf", + "triangleleft;": "\u25c3", + "trianglelefteq;": "\u22b4", + "triangleq;": "\u225c", + "triangleright;": "\u25b9", + "trianglerighteq;": "\u22b5", + "tridot;": "\u25ec", + "trie;": "\u225c", + "triminus;": "\u2a3a", + "triplus;": "\u2a39", + "trisb;": "\u29cd", + "tritime;": "\u2a3b", + "trpezium;": "\u23e2", + "tscr;": "\U0001d4c9", + "tscy;": "\u0446", + "tshcy;": "\u045b", + "tstrok;": "\u0167", + "twixt;": "\u226c", + "twoheadleftarrow;": "\u219e", + "twoheadrightarrow;": "\u21a0", + "uArr;": "\u21d1", + "uHar;": "\u2963", + "uacute": "\xfa", + "uacute;": "\xfa", + "uarr;": "\u2191", + "ubrcy;": "\u045e", + "ubreve;": "\u016d", + "ucirc": "\xfb", + "ucirc;": "\xfb", + "ucy;": "\u0443", + "udarr;": "\u21c5", + "udblac;": "\u0171", + "udhar;": "\u296e", + "ufisht;": "\u297e", + "ufr;": "\U0001d532", + "ugrave": "\xf9", + "ugrave;": "\xf9", + "uharl;": "\u21bf", + "uharr;": "\u21be", + "uhblk;": "\u2580", + "ulcorn;": "\u231c", + "ulcorner;": "\u231c", + "ulcrop;": "\u230f", + "ultri;": "\u25f8", + "umacr;": "\u016b", + "uml": "\xa8", + "uml;": "\xa8", + "uogon;": "\u0173", + "uopf;": "\U0001d566", + "uparrow;": "\u2191", + "updownarrow;": "\u2195", + "upharpoonleft;": "\u21bf", + "upharpoonright;": "\u21be", + "uplus;": "\u228e", + "upsi;": "\u03c5", + "upsih;": "\u03d2", + "upsilon;": "\u03c5", + "upuparrows;": "\u21c8", + "urcorn;": "\u231d", + "urcorner;": "\u231d", + "urcrop;": "\u230e", + "uring;": "\u016f", + "urtri;": "\u25f9", + "uscr;": "\U0001d4ca", + "utdot;": "\u22f0", + "utilde;": "\u0169", + "utri;": "\u25b5", + "utrif;": "\u25b4", + "uuarr;": "\u21c8", + "uuml": "\xfc", + "uuml;": "\xfc", + "uwangle;": "\u29a7", + "vArr;": "\u21d5", + "vBar;": "\u2ae8", + "vBarv;": "\u2ae9", + "vDash;": "\u22a8", + "vangrt;": "\u299c", + "varepsilon;": "\u03f5", + "varkappa;": "\u03f0", + "varnothing;": "\u2205", + "varphi;": "\u03d5", + "varpi;": "\u03d6", + "varpropto;": "\u221d", + "varr;": "\u2195", + "varrho;": "\u03f1", + "varsigma;": "\u03c2", + "varsubsetneq;": "\u228a\ufe00", + "varsubsetneqq;": "\u2acb\ufe00", + "varsupsetneq;": "\u228b\ufe00", + "varsupsetneqq;": "\u2acc\ufe00", + "vartheta;": "\u03d1", + "vartriangleleft;": "\u22b2", + "vartriangleright;": "\u22b3", + "vcy;": "\u0432", + "vdash;": "\u22a2", + "vee;": "\u2228", + "veebar;": "\u22bb", + "veeeq;": "\u225a", + "vellip;": "\u22ee", + "verbar;": "|", + "vert;": "|", + "vfr;": "\U0001d533", + "vltri;": "\u22b2", + "vnsub;": "\u2282\u20d2", + "vnsup;": "\u2283\u20d2", + "vopf;": "\U0001d567", + "vprop;": "\u221d", + "vrtri;": "\u22b3", + "vscr;": "\U0001d4cb", + "vsubnE;": "\u2acb\ufe00", + "vsubne;": "\u228a\ufe00", + "vsupnE;": "\u2acc\ufe00", + "vsupne;": "\u228b\ufe00", + "vzigzag;": "\u299a", + "wcirc;": "\u0175", + "wedbar;": "\u2a5f", + "wedge;": "\u2227", + "wedgeq;": "\u2259", + "weierp;": "\u2118", + "wfr;": "\U0001d534", + "wopf;": "\U0001d568", + "wp;": "\u2118", + "wr;": "\u2240", + "wreath;": "\u2240", + "wscr;": "\U0001d4cc", + "xcap;": "\u22c2", + "xcirc;": "\u25ef", + "xcup;": "\u22c3", + "xdtri;": "\u25bd", + "xfr;": "\U0001d535", + "xhArr;": "\u27fa", + "xharr;": "\u27f7", + "xi;": "\u03be", + "xlArr;": "\u27f8", + "xlarr;": "\u27f5", + "xmap;": "\u27fc", + "xnis;": "\u22fb", + "xodot;": "\u2a00", + "xopf;": "\U0001d569", + "xoplus;": "\u2a01", + "xotime;": "\u2a02", + "xrArr;": "\u27f9", + "xrarr;": "\u27f6", + "xscr;": "\U0001d4cd", + "xsqcup;": "\u2a06", + "xuplus;": "\u2a04", + "xutri;": "\u25b3", + "xvee;": "\u22c1", + "xwedge;": "\u22c0", + "yacute": "\xfd", + "yacute;": "\xfd", + "yacy;": "\u044f", + "ycirc;": "\u0177", + "ycy;": "\u044b", + "yen": "\xa5", + "yen;": "\xa5", + "yfr;": "\U0001d536", + "yicy;": "\u0457", + "yopf;": "\U0001d56a", + "yscr;": "\U0001d4ce", + "yucy;": "\u044e", + "yuml": "\xff", + "yuml;": "\xff", + "zacute;": "\u017a", + "zcaron;": "\u017e", + "zcy;": "\u0437", + "zdot;": "\u017c", + "zeetrf;": "\u2128", + "zeta;": "\u03b6", + "zfr;": "\U0001d537", + "zhcy;": "\u0436", + "zigrarr;": "\u21dd", + "zopf;": "\U0001d56b", + "zscr;": "\U0001d4cf", + "zwj;": "\u200d", + "zwnj;": "\u200c", +} + +replacementCharacters = { + 0x0: "\uFFFD", + 0x0d: "\u000D", + 0x80: "\u20AC", + 0x81: "\u0081", + 0x82: "\u201A", + 0x83: "\u0192", + 0x84: "\u201E", + 0x85: "\u2026", + 0x86: "\u2020", + 0x87: "\u2021", + 0x88: "\u02C6", + 0x89: "\u2030", + 0x8A: "\u0160", + 0x8B: "\u2039", + 0x8C: "\u0152", + 0x8D: "\u008D", + 0x8E: "\u017D", + 0x8F: "\u008F", + 0x90: "\u0090", + 0x91: "\u2018", + 0x92: "\u2019", + 0x93: "\u201C", + 0x94: "\u201D", + 0x95: "\u2022", + 0x96: "\u2013", + 0x97: "\u2014", + 0x98: "\u02DC", + 0x99: "\u2122", + 0x9A: "\u0161", + 0x9B: "\u203A", + 0x9C: "\u0153", + 0x9D: "\u009D", + 0x9E: "\u017E", + 0x9F: "\u0178", +} + +tokenTypes = { + "Doctype": 0, + "Characters": 1, + "SpaceCharacters": 2, + "StartTag": 3, + "EndTag": 4, + "EmptyTag": 5, + "Comment": 6, + "ParseError": 7 +} + +tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], + tokenTypes["EmptyTag"]]) + + +prefixes = dict([(v, k) for k, v in namespaces.items()]) +prefixes["http://www.w3.org/1998/Math/MathML"] = "math" + + +class DataLossWarning(UserWarning): + """Raised when the current tree is unable to represent the input data""" + pass + + +class _ReparseException(Exception): + pass diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__init__.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__init__.py diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..742a8027 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/__init__.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..6f9a0050 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/alphabeticalattributes.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..75c79173 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/base.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..e98a89f6 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/inject_meta_charset.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..63b7ddb5 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/lint.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..c6090666 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/optionaltags.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..985e92e0 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/sanitizer.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..86dc6bd7 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/__pycache__/whitespace.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 00000000..5ba926e3 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py @@ -0,0 +1,29 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import base + +from collections import OrderedDict + + +def _attr_key(attr): + """Return an appropriate key for an attribute for sorting + + Attributes have a namespace that can be either ``None`` or a string. We + can't compare the two because they're different types, so we convert + ``None`` to an empty string first. + + """ + return (attr[0][0] or ''), attr[0][1] + + +class Filter(base.Filter): + """Alphabetizes attributes for elements""" + def __iter__(self): + for token in base.Filter.__iter__(self): + if token["type"] in ("StartTag", "EmptyTag"): + attrs = OrderedDict() + for name, value in sorted(token["data"].items(), + key=_attr_key): + attrs[name] = value + token["data"] = attrs + yield token diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/base.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/base.py new file mode 100644 index 00000000..c7dbaed0 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/base.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + + +class Filter(object): + def __init__(self, source): + self.source = source + + def __iter__(self): + return iter(self.source) + + def __getattr__(self, name): + return getattr(self.source, name) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py new file mode 100644 index 00000000..aefb5c84 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.py @@ -0,0 +1,73 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import base + + +class Filter(base.Filter): + """Injects ``<meta charset=ENCODING>`` tag into head of document""" + def __init__(self, source, encoding): + """Creates a Filter + + :arg source: the source token stream + + :arg encoding: the encoding to set + + """ + base.Filter.__init__(self, source) + self.encoding = encoding + + def __iter__(self): + state = "pre_head" + meta_found = (self.encoding is None) + pending = [] + + for token in base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag": + if token["name"].lower() == "head": + state = "in_head" + + elif type == "EmptyTag": + if token["name"].lower() == "meta": + # replace charset with actual encoding + has_http_equiv_content_type = False + for (namespace, name), value in token["data"].items(): + if namespace is not None: + continue + elif name.lower() == 'charset': + token["data"][(namespace, name)] = self.encoding + meta_found = True + break + elif name == 'http-equiv' and value.lower() == 'content-type': + has_http_equiv_content_type = True + else: + if has_http_equiv_content_type and (None, "content") in token["data"]: + token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding + meta_found = True + + elif token["name"].lower() == "head" and not meta_found: + # insert meta into empty head + yield {"type": "StartTag", "name": "head", + "data": token["data"]} + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + yield {"type": "EndTag", "name": "head"} + meta_found = True + continue + + elif type == "EndTag": + if token["name"].lower() == "head" and pending: + # insert meta into head (if necessary) and flush pending queue + yield pending.pop(0) + if not meta_found: + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + while pending: + yield pending.pop(0) + meta_found = True + state = "post_head" + + if state == "in_head": + pending.append(token) + else: + yield token diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/lint.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/lint.py new file mode 100644 index 00000000..fcc07eec --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/lint.py @@ -0,0 +1,93 @@ +from __future__ import absolute_import, division, unicode_literals + +from pip._vendor.six import text_type + +from . import base +from ..constants import namespaces, voidElements + +from ..constants import spaceCharacters +spaceCharacters = "".join(spaceCharacters) + + +class Filter(base.Filter): + """Lints the token stream for errors + + If it finds any errors, it'll raise an ``AssertionError``. + + """ + def __init__(self, source, require_matching_tags=True): + """Creates a Filter + + :arg source: the source token stream + + :arg require_matching_tags: whether or not to require matching tags + + """ + super(Filter, self).__init__(source) + self.require_matching_tags = require_matching_tags + + def __iter__(self): + open_elements = [] + for token in base.Filter.__iter__(self): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + namespace = token["namespace"] + name = token["name"] + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + assert isinstance(token["data"], dict) + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + assert type == "EmptyTag" + else: + assert type == "StartTag" + if type == "StartTag" and self.require_matching_tags: + open_elements.append((namespace, name)) + for (namespace, name), value in token["data"].items(): + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + assert isinstance(value, text_type) + + elif type == "EndTag": + namespace = token["namespace"] + name = token["name"] + assert namespace is None or isinstance(namespace, text_type) + assert namespace != "" + assert isinstance(name, text_type) + assert name != "" + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} + elif self.require_matching_tags: + start = open_elements.pop() + assert start == (namespace, name) + + elif type == "Comment": + data = token["data"] + assert isinstance(data, text_type) + + elif type in ("Characters", "SpaceCharacters"): + data = token["data"] + assert isinstance(data, text_type) + assert data != "" + if type == "SpaceCharacters": + assert data.strip(spaceCharacters) == "" + + elif type == "Doctype": + name = token["name"] + assert name is None or isinstance(name, text_type) + assert token["publicId"] is None or isinstance(name, text_type) + assert token["systemId"] is None or isinstance(name, text_type) + + elif type == "Entity": + assert isinstance(token["name"], text_type) + + elif type == "SerializerError": + assert isinstance(token["data"], text_type) + + else: + assert False, "Unknown token type: %(type)s" % {"type": type} + + yield token diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/optionaltags.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/optionaltags.py new file mode 100644 index 00000000..4a865012 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/optionaltags.py @@ -0,0 +1,207 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import base + + +class Filter(base.Filter): + """Removes optional tags from the token stream""" + def slider(self): + previous1 = previous2 = None + for token in self.source: + if previous1 is not None: + yield previous2, previous1, token + previous2 = previous1 + previous1 = token + if previous1 is not None: + yield previous2, previous1, None + + def __iter__(self): + for previous, token, next in self.slider(): + type = token["type"] + if type == "StartTag": + if (token["data"] or + not self.is_optional_start(token["name"], previous, next)): + yield token + elif type == "EndTag": + if not self.is_optional_end(token["name"], next): + yield token + else: + yield token + + def is_optional_start(self, tagname, previous, next): + type = next and next["type"] or None + if tagname in 'html': + # An html element's start tag may be omitted if the first thing + # inside the html element is not a space character or a comment. + return type not in ("Comment", "SpaceCharacters") + elif tagname == 'head': + # A head element's start tag may be omitted if the first thing + # inside the head element is an element. + # XXX: we also omit the start tag if the head element is empty + if type in ("StartTag", "EmptyTag"): + return True + elif type == "EndTag": + return next["name"] == "head" + elif tagname == 'body': + # A body element's start tag may be omitted if the first thing + # inside the body element is not a space character or a comment, + # except if the first thing inside the body element is a script + # or style element and the node immediately preceding the body + # element is a head element whose end tag has been omitted. + if type in ("Comment", "SpaceCharacters"): + return False + elif type == "StartTag": + # XXX: we do not look at the preceding event, so we never omit + # the body element's start tag if it's followed by a script or + # a style element. + return next["name"] not in ('script', 'style') + else: + return True + elif tagname == 'colgroup': + # A colgroup element's start tag may be omitted if the first thing + # inside the colgroup element is a col element, and if the element + # is not immediately preceded by another colgroup element whose + # end tag has been omitted. + if type in ("StartTag", "EmptyTag"): + # XXX: we do not look at the preceding event, so instead we never + # omit the colgroup element's end tag when it is immediately + # followed by another colgroup element. See is_optional_end. + return next["name"] == "col" + else: + return False + elif tagname == 'tbody': + # A tbody element's start tag may be omitted if the first thing + # inside the tbody element is a tr element, and if the element is + # not immediately preceded by a tbody, thead, or tfoot element + # whose end tag has been omitted. + if type == "StartTag": + # omit the thead and tfoot elements' end tag when they are + # immediately followed by a tbody element. See is_optional_end. + if previous and previous['type'] == 'EndTag' and \ + previous['name'] in ('tbody', 'thead', 'tfoot'): + return False + return next["name"] == 'tr' + else: + return False + return False + + def is_optional_end(self, tagname, next): + type = next and next["type"] or None + if tagname in ('html', 'head', 'body'): + # An html element's end tag may be omitted if the html element + # is not immediately followed by a space character or a comment. + return type not in ("Comment", "SpaceCharacters") + elif tagname in ('li', 'optgroup', 'tr'): + # A li element's end tag may be omitted if the li element is + # immediately followed by another li element or if there is + # no more content in the parent element. + # An optgroup element's end tag may be omitted if the optgroup + # element is immediately followed by another optgroup element, + # or if there is no more content in the parent element. + # A tr element's end tag may be omitted if the tr element is + # immediately followed by another tr element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] == tagname + else: + return type == "EndTag" or type is None + elif tagname in ('dt', 'dd'): + # A dt element's end tag may be omitted if the dt element is + # immediately followed by another dt element or a dd element. + # A dd element's end tag may be omitted if the dd element is + # immediately followed by another dd element or a dt element, + # or if there is no more content in the parent element. + if type == "StartTag": + return next["name"] in ('dt', 'dd') + elif tagname == 'dd': + return type == "EndTag" or type is None + else: + return False + elif tagname == 'p': + # A p element's end tag may be omitted if the p element is + # immediately followed by an address, article, aside, + # blockquote, datagrid, dialog, dir, div, dl, fieldset, + # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, + # nav, ol, p, pre, section, table, or ul, element, or if + # there is no more content in the parent element. + if type in ("StartTag", "EmptyTag"): + return next["name"] in ('address', 'article', 'aside', + 'blockquote', 'datagrid', 'dialog', + 'dir', 'div', 'dl', 'fieldset', 'footer', + 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'header', 'hr', 'menu', 'nav', 'ol', + 'p', 'pre', 'section', 'table', 'ul') + else: + return type == "EndTag" or type is None + elif tagname == 'option': + # An option element's end tag may be omitted if the option + # element is immediately followed by another option element, + # or if it is immediately followed by an <code>optgroup</code> + # element, or if there is no more content in the parent + # element. + if type == "StartTag": + return next["name"] in ('option', 'optgroup') + else: + return type == "EndTag" or type is None + elif tagname in ('rt', 'rp'): + # An rt element's end tag may be omitted if the rt element is + # immediately followed by an rt or rp element, or if there is + # no more content in the parent element. + # An rp element's end tag may be omitted if the rp element is + # immediately followed by an rt or rp element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] in ('rt', 'rp') + else: + return type == "EndTag" or type is None + elif tagname == 'colgroup': + # A colgroup element's end tag may be omitted if the colgroup + # element is not immediately followed by a space character or + # a comment. + if type in ("Comment", "SpaceCharacters"): + return False + elif type == "StartTag": + # XXX: we also look for an immediately following colgroup + # element. See is_optional_start. + return next["name"] != 'colgroup' + else: + return True + elif tagname in ('thead', 'tbody'): + # A thead element's end tag may be omitted if the thead element + # is immediately followed by a tbody or tfoot element. + # A tbody element's end tag may be omitted if the tbody element + # is immediately followed by a tbody or tfoot element, or if + # there is no more content in the parent element. + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == "StartTag": + return next["name"] in ['tbody', 'tfoot'] + elif tagname == 'tbody': + return type == "EndTag" or type is None + else: + return False + elif tagname == 'tfoot': + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == "StartTag": + return next["name"] == 'tbody' + else: + return type == "EndTag" or type is None + elif tagname in ('td', 'th'): + # A td element's end tag may be omitted if the td element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + # A th element's end tag may be omitted if the th element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] in ('td', 'th') + else: + return type == "EndTag" or type is None + return False diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/sanitizer.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/sanitizer.py new file mode 100644 index 00000000..af8e77b8 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/sanitizer.py @@ -0,0 +1,896 @@ +from __future__ import absolute_import, division, unicode_literals + +import re +from xml.sax.saxutils import escape, unescape + +from pip._vendor.six.moves import urllib_parse as urlparse + +from . import base +from ..constants import namespaces, prefixes + +__all__ = ["Filter"] + + +allowed_elements = frozenset(( + (namespaces['html'], 'a'), + (namespaces['html'], 'abbr'), + (namespaces['html'], 'acronym'), + (namespaces['html'], 'address'), + (namespaces['html'], 'area'), + (namespaces['html'], 'article'), + (namespaces['html'], 'aside'), + (namespaces['html'], 'audio'), + (namespaces['html'], 'b'), + (namespaces['html'], 'big'), + (namespaces['html'], 'blockquote'), + (namespaces['html'], 'br'), + (namespaces['html'], 'button'), + (namespaces['html'], 'canvas'), + (namespaces['html'], 'caption'), + (namespaces['html'], 'center'), + (namespaces['html'], 'cite'), + (namespaces['html'], 'code'), + (namespaces['html'], 'col'), + (namespaces['html'], 'colgroup'), + (namespaces['html'], 'command'), + (namespaces['html'], 'datagrid'), + (namespaces['html'], 'datalist'), + (namespaces['html'], 'dd'), + (namespaces['html'], 'del'), + (namespaces['html'], 'details'), + (namespaces['html'], 'dfn'), + (namespaces['html'], 'dialog'), + (namespaces['html'], 'dir'), + (namespaces['html'], 'div'), + (namespaces['html'], 'dl'), + (namespaces['html'], 'dt'), + (namespaces['html'], 'em'), + (namespaces['html'], 'event-source'), + (namespaces['html'], 'fieldset'), + (namespaces['html'], 'figcaption'), + (namespaces['html'], 'figure'), + (namespaces['html'], 'footer'), + (namespaces['html'], 'font'), + (namespaces['html'], 'form'), + (namespaces['html'], 'header'), + (namespaces['html'], 'h1'), + (namespaces['html'], 'h2'), + (namespaces['html'], 'h3'), + (namespaces['html'], 'h4'), + (namespaces['html'], 'h5'), + (namespaces['html'], 'h6'), + (namespaces['html'], 'hr'), + (namespaces['html'], 'i'), + (namespaces['html'], 'img'), + (namespaces['html'], 'input'), + (namespaces['html'], 'ins'), + (namespaces['html'], 'keygen'), + (namespaces['html'], 'kbd'), + (namespaces['html'], 'label'), + (namespaces['html'], 'legend'), + (namespaces['html'], 'li'), + (namespaces['html'], 'm'), + (namespaces['html'], 'map'), + (namespaces['html'], 'menu'), + (namespaces['html'], 'meter'), + (namespaces['html'], 'multicol'), + (namespaces['html'], 'nav'), + (namespaces['html'], 'nextid'), + (namespaces['html'], 'ol'), + (namespaces['html'], 'output'), + (namespaces['html'], 'optgroup'), + (namespaces['html'], 'option'), + (namespaces['html'], 'p'), + (namespaces['html'], 'pre'), + (namespaces['html'], 'progress'), + (namespaces['html'], 'q'), + (namespaces['html'], 's'), + (namespaces['html'], 'samp'), + (namespaces['html'], 'section'), + (namespaces['html'], 'select'), + (namespaces['html'], 'small'), + (namespaces['html'], 'sound'), + (namespaces['html'], 'source'), + (namespaces['html'], 'spacer'), + (namespaces['html'], 'span'), + (namespaces['html'], 'strike'), + (namespaces['html'], 'strong'), + (namespaces['html'], 'sub'), + (namespaces['html'], 'sup'), + (namespaces['html'], 'table'), + (namespaces['html'], 'tbody'), + (namespaces['html'], 'td'), + (namespaces['html'], 'textarea'), + (namespaces['html'], 'time'), + (namespaces['html'], 'tfoot'), + (namespaces['html'], 'th'), + (namespaces['html'], 'thead'), + (namespaces['html'], 'tr'), + (namespaces['html'], 'tt'), + (namespaces['html'], 'u'), + (namespaces['html'], 'ul'), + (namespaces['html'], 'var'), + (namespaces['html'], 'video'), + (namespaces['mathml'], 'maction'), + (namespaces['mathml'], 'math'), + (namespaces['mathml'], 'merror'), + (namespaces['mathml'], 'mfrac'), + (namespaces['mathml'], 'mi'), + (namespaces['mathml'], 'mmultiscripts'), + (namespaces['mathml'], 'mn'), + (namespaces['mathml'], 'mo'), + (namespaces['mathml'], 'mover'), + (namespaces['mathml'], 'mpadded'), + (namespaces['mathml'], 'mphantom'), + (namespaces['mathml'], 'mprescripts'), + (namespaces['mathml'], 'mroot'), + (namespaces['mathml'], 'mrow'), + (namespaces['mathml'], 'mspace'), + (namespaces['mathml'], 'msqrt'), + (namespaces['mathml'], 'mstyle'), + (namespaces['mathml'], 'msub'), + (namespaces['mathml'], 'msubsup'), + (namespaces['mathml'], 'msup'), + (namespaces['mathml'], 'mtable'), + (namespaces['mathml'], 'mtd'), + (namespaces['mathml'], 'mtext'), + (namespaces['mathml'], 'mtr'), + (namespaces['mathml'], 'munder'), + (namespaces['mathml'], 'munderover'), + (namespaces['mathml'], 'none'), + (namespaces['svg'], 'a'), + (namespaces['svg'], 'animate'), + (namespaces['svg'], 'animateColor'), + (namespaces['svg'], 'animateMotion'), + (namespaces['svg'], 'animateTransform'), + (namespaces['svg'], 'clipPath'), + (namespaces['svg'], 'circle'), + (namespaces['svg'], 'defs'), + (namespaces['svg'], 'desc'), + (namespaces['svg'], 'ellipse'), + (namespaces['svg'], 'font-face'), + (namespaces['svg'], 'font-face-name'), + (namespaces['svg'], 'font-face-src'), + (namespaces['svg'], 'g'), + (namespaces['svg'], 'glyph'), + (namespaces['svg'], 'hkern'), + (namespaces['svg'], 'linearGradient'), + (namespaces['svg'], 'line'), + (namespaces['svg'], 'marker'), + (namespaces['svg'], 'metadata'), + (namespaces['svg'], 'missing-glyph'), + (namespaces['svg'], 'mpath'), + (namespaces['svg'], 'path'), + (namespaces['svg'], 'polygon'), + (namespaces['svg'], 'polyline'), + (namespaces['svg'], 'radialGradient'), + (namespaces['svg'], 'rect'), + (namespaces['svg'], 'set'), + (namespaces['svg'], 'stop'), + (namespaces['svg'], 'svg'), + (namespaces['svg'], 'switch'), + (namespaces['svg'], 'text'), + (namespaces['svg'], 'title'), + (namespaces['svg'], 'tspan'), + (namespaces['svg'], 'use'), +)) + +allowed_attributes = frozenset(( + # HTML attributes + (None, 'abbr'), + (None, 'accept'), + (None, 'accept-charset'), + (None, 'accesskey'), + (None, 'action'), + (None, 'align'), + (None, 'alt'), + (None, 'autocomplete'), + (None, 'autofocus'), + (None, 'axis'), + (None, 'background'), + (None, 'balance'), + (None, 'bgcolor'), + (None, 'bgproperties'), + (None, 'border'), + (None, 'bordercolor'), + (None, 'bordercolordark'), + (None, 'bordercolorlight'), + (None, 'bottompadding'), + (None, 'cellpadding'), + (None, 'cellspacing'), + (None, 'ch'), + (None, 'challenge'), + (None, 'char'), + (None, 'charoff'), + (None, 'choff'), + (None, 'charset'), + (None, 'checked'), + (None, 'cite'), + (None, 'class'), + (None, 'clear'), + (None, 'color'), + (None, 'cols'), + (None, 'colspan'), + (None, 'compact'), + (None, 'contenteditable'), + (None, 'controls'), + (None, 'coords'), + (None, 'data'), + (None, 'datafld'), + (None, 'datapagesize'), + (None, 'datasrc'), + (None, 'datetime'), + (None, 'default'), + (None, 'delay'), + (None, 'dir'), + (None, 'disabled'), + (None, 'draggable'), + (None, 'dynsrc'), + (None, 'enctype'), + (None, 'end'), + (None, 'face'), + (None, 'for'), + (None, 'form'), + (None, 'frame'), + (None, 'galleryimg'), + (None, 'gutter'), + (None, 'headers'), + (None, 'height'), + (None, 'hidefocus'), + (None, 'hidden'), + (None, 'high'), + (None, 'href'), + (None, 'hreflang'), + (None, 'hspace'), + (None, 'icon'), + (None, 'id'), + (None, 'inputmode'), + (None, 'ismap'), + (None, 'keytype'), + (None, 'label'), + (None, 'leftspacing'), + (None, 'lang'), + (None, 'list'), + (None, 'longdesc'), + (None, 'loop'), + (None, 'loopcount'), + (None, 'loopend'), + (None, 'loopstart'), + (None, 'low'), + (None, 'lowsrc'), + (None, 'max'), + (None, 'maxlength'), + (None, 'media'), + (None, 'method'), + (None, 'min'), + (None, 'multiple'), + (None, 'name'), + (None, 'nohref'), + (None, 'noshade'), + (None, 'nowrap'), + (None, 'open'), + (None, 'optimum'), + (None, 'pattern'), + (None, 'ping'), + (None, 'point-size'), + (None, 'poster'), + (None, 'pqg'), + (None, 'preload'), + (None, 'prompt'), + (None, 'radiogroup'), + (None, 'readonly'), + (None, 'rel'), + (None, 'repeat-max'), + (None, 'repeat-min'), + (None, 'replace'), + (None, 'required'), + (None, 'rev'), + (None, 'rightspacing'), + (None, 'rows'), + (None, 'rowspan'), + (None, 'rules'), + (None, 'scope'), + (None, 'selected'), + (None, 'shape'), + (None, 'size'), + (None, 'span'), + (None, 'src'), + (None, 'start'), + (None, 'step'), + (None, 'style'), + (None, 'summary'), + (None, 'suppress'), + (None, 'tabindex'), + (None, 'target'), + (None, 'template'), + (None, 'title'), + (None, 'toppadding'), + (None, 'type'), + (None, 'unselectable'), + (None, 'usemap'), + (None, 'urn'), + (None, 'valign'), + (None, 'value'), + (None, 'variable'), + (None, 'volume'), + (None, 'vspace'), + (None, 'vrml'), + (None, 'width'), + (None, 'wrap'), + (namespaces['xml'], 'lang'), + # MathML attributes + (None, 'actiontype'), + (None, 'align'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnalign'), + (None, 'columnlines'), + (None, 'columnspacing'), + (None, 'columnspan'), + (None, 'depth'), + (None, 'display'), + (None, 'displaystyle'), + (None, 'equalcolumns'), + (None, 'equalrows'), + (None, 'fence'), + (None, 'fontstyle'), + (None, 'fontweight'), + (None, 'frame'), + (None, 'height'), + (None, 'linethickness'), + (None, 'lspace'), + (None, 'mathbackground'), + (None, 'mathcolor'), + (None, 'mathvariant'), + (None, 'mathvariant'), + (None, 'maxsize'), + (None, 'minsize'), + (None, 'other'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowalign'), + (None, 'rowlines'), + (None, 'rowspacing'), + (None, 'rowspan'), + (None, 'rspace'), + (None, 'scriptlevel'), + (None, 'selection'), + (None, 'separator'), + (None, 'stretchy'), + (None, 'width'), + (None, 'width'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'type'), + # SVG attributes + (None, 'accent-height'), + (None, 'accumulate'), + (None, 'additive'), + (None, 'alphabetic'), + (None, 'arabic-form'), + (None, 'ascent'), + (None, 'attributeName'), + (None, 'attributeType'), + (None, 'baseProfile'), + (None, 'bbox'), + (None, 'begin'), + (None, 'by'), + (None, 'calcMode'), + (None, 'cap-height'), + (None, 'class'), + (None, 'clip-path'), + (None, 'color'), + (None, 'color-rendering'), + (None, 'content'), + (None, 'cx'), + (None, 'cy'), + (None, 'd'), + (None, 'dx'), + (None, 'dy'), + (None, 'descent'), + (None, 'display'), + (None, 'dur'), + (None, 'end'), + (None, 'fill'), + (None, 'fill-opacity'), + (None, 'fill-rule'), + (None, 'font-family'), + (None, 'font-size'), + (None, 'font-stretch'), + (None, 'font-style'), + (None, 'font-variant'), + (None, 'font-weight'), + (None, 'from'), + (None, 'fx'), + (None, 'fy'), + (None, 'g1'), + (None, 'g2'), + (None, 'glyph-name'), + (None, 'gradientUnits'), + (None, 'hanging'), + (None, 'height'), + (None, 'horiz-adv-x'), + (None, 'horiz-origin-x'), + (None, 'id'), + (None, 'ideographic'), + (None, 'k'), + (None, 'keyPoints'), + (None, 'keySplines'), + (None, 'keyTimes'), + (None, 'lang'), + (None, 'marker-end'), + (None, 'marker-mid'), + (None, 'marker-start'), + (None, 'markerHeight'), + (None, 'markerUnits'), + (None, 'markerWidth'), + (None, 'mathematical'), + (None, 'max'), + (None, 'min'), + (None, 'name'), + (None, 'offset'), + (None, 'opacity'), + (None, 'orient'), + (None, 'origin'), + (None, 'overline-position'), + (None, 'overline-thickness'), + (None, 'panose-1'), + (None, 'path'), + (None, 'pathLength'), + (None, 'points'), + (None, 'preserveAspectRatio'), + (None, 'r'), + (None, 'refX'), + (None, 'refY'), + (None, 'repeatCount'), + (None, 'repeatDur'), + (None, 'requiredExtensions'), + (None, 'requiredFeatures'), + (None, 'restart'), + (None, 'rotate'), + (None, 'rx'), + (None, 'ry'), + (None, 'slope'), + (None, 'stemh'), + (None, 'stemv'), + (None, 'stop-color'), + (None, 'stop-opacity'), + (None, 'strikethrough-position'), + (None, 'strikethrough-thickness'), + (None, 'stroke'), + (None, 'stroke-dasharray'), + (None, 'stroke-dashoffset'), + (None, 'stroke-linecap'), + (None, 'stroke-linejoin'), + (None, 'stroke-miterlimit'), + (None, 'stroke-opacity'), + (None, 'stroke-width'), + (None, 'systemLanguage'), + (None, 'target'), + (None, 'text-anchor'), + (None, 'to'), + (None, 'transform'), + (None, 'type'), + (None, 'u1'), + (None, 'u2'), + (None, 'underline-position'), + (None, 'underline-thickness'), + (None, 'unicode'), + (None, 'unicode-range'), + (None, 'units-per-em'), + (None, 'values'), + (None, 'version'), + (None, 'viewBox'), + (None, 'visibility'), + (None, 'width'), + (None, 'widths'), + (None, 'x'), + (None, 'x-height'), + (None, 'x1'), + (None, 'x2'), + (namespaces['xlink'], 'actuate'), + (namespaces['xlink'], 'arcrole'), + (namespaces['xlink'], 'href'), + (namespaces['xlink'], 'role'), + (namespaces['xlink'], 'show'), + (namespaces['xlink'], 'title'), + (namespaces['xlink'], 'type'), + (namespaces['xml'], 'base'), + (namespaces['xml'], 'lang'), + (namespaces['xml'], 'space'), + (None, 'y'), + (None, 'y1'), + (None, 'y2'), + (None, 'zoomAndPan'), +)) + +attr_val_is_uri = frozenset(( + (None, 'href'), + (None, 'src'), + (None, 'cite'), + (None, 'action'), + (None, 'longdesc'), + (None, 'poster'), + (None, 'background'), + (None, 'datasrc'), + (None, 'dynsrc'), + (None, 'lowsrc'), + (None, 'ping'), + (namespaces['xlink'], 'href'), + (namespaces['xml'], 'base'), +)) + +svg_attr_val_allows_ref = frozenset(( + (None, 'clip-path'), + (None, 'color-profile'), + (None, 'cursor'), + (None, 'fill'), + (None, 'filter'), + (None, 'marker'), + (None, 'marker-start'), + (None, 'marker-mid'), + (None, 'marker-end'), + (None, 'mask'), + (None, 'stroke'), +)) + +svg_allow_local_href = frozenset(( + (None, 'altGlyph'), + (None, 'animate'), + (None, 'animateColor'), + (None, 'animateMotion'), + (None, 'animateTransform'), + (None, 'cursor'), + (None, 'feImage'), + (None, 'filter'), + (None, 'linearGradient'), + (None, 'pattern'), + (None, 'radialGradient'), + (None, 'textpath'), + (None, 'tref'), + (None, 'set'), + (None, 'use') +)) + +allowed_css_properties = frozenset(( + 'azimuth', + 'background-color', + 'border-bottom-color', + 'border-collapse', + 'border-color', + 'border-left-color', + 'border-right-color', + 'border-top-color', + 'clear', + 'color', + 'cursor', + 'direction', + 'display', + 'elevation', + 'float', + 'font', + 'font-family', + 'font-size', + 'font-style', + 'font-variant', + 'font-weight', + 'height', + 'letter-spacing', + 'line-height', + 'overflow', + 'pause', + 'pause-after', + 'pause-before', + 'pitch', + 'pitch-range', + 'richness', + 'speak', + 'speak-header', + 'speak-numeral', + 'speak-punctuation', + 'speech-rate', + 'stress', + 'text-align', + 'text-decoration', + 'text-indent', + 'unicode-bidi', + 'vertical-align', + 'voice-family', + 'volume', + 'white-space', + 'width', +)) + +allowed_css_keywords = frozenset(( + 'auto', + 'aqua', + 'black', + 'block', + 'blue', + 'bold', + 'both', + 'bottom', + 'brown', + 'center', + 'collapse', + 'dashed', + 'dotted', + 'fuchsia', + 'gray', + 'green', + '!important', + 'italic', + 'left', + 'lime', + 'maroon', + 'medium', + 'none', + 'navy', + 'normal', + 'nowrap', + 'olive', + 'pointer', + 'purple', + 'red', + 'right', + 'solid', + 'silver', + 'teal', + 'top', + 'transparent', + 'underline', + 'white', + 'yellow', +)) + +allowed_svg_properties = frozenset(( + 'fill', + 'fill-opacity', + 'fill-rule', + 'stroke', + 'stroke-width', + 'stroke-linecap', + 'stroke-linejoin', + 'stroke-opacity', +)) + +allowed_protocols = frozenset(( + 'ed2k', + 'ftp', + 'http', + 'https', + 'irc', + 'mailto', + 'news', + 'gopher', + 'nntp', + 'telnet', + 'webcal', + 'xmpp', + 'callto', + 'feed', + 'urn', + 'aim', + 'rsync', + 'tag', + 'ssh', + 'sftp', + 'rtsp', + 'afs', + 'data', +)) + +allowed_content_types = frozenset(( + 'image/png', + 'image/jpeg', + 'image/gif', + 'image/webp', + 'image/bmp', + 'text/plain', +)) + + +data_content_type = re.compile(r''' + ^ + # Match a content type <application>/<type> + (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) + # Match any character set and encoding + (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) + |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) + # Assume the rest is data + ,.* + $ + ''', + re.VERBOSE) + + +class Filter(base.Filter): + """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" + def __init__(self, + source, + allowed_elements=allowed_elements, + allowed_attributes=allowed_attributes, + allowed_css_properties=allowed_css_properties, + allowed_css_keywords=allowed_css_keywords, + allowed_svg_properties=allowed_svg_properties, + allowed_protocols=allowed_protocols, + allowed_content_types=allowed_content_types, + attr_val_is_uri=attr_val_is_uri, + svg_attr_val_allows_ref=svg_attr_val_allows_ref, + svg_allow_local_href=svg_allow_local_href): + """Creates a Filter + + :arg allowed_elements: set of elements to allow--everything else will + be escaped + + :arg allowed_attributes: set of attributes to allow in + elements--everything else will be stripped + + :arg allowed_css_properties: set of CSS properties to allow--everything + else will be stripped + + :arg allowed_css_keywords: set of CSS keywords to allow--everything + else will be stripped + + :arg allowed_svg_properties: set of SVG properties to allow--everything + else will be removed + + :arg allowed_protocols: set of allowed protocols for URIs + + :arg allowed_content_types: set of allowed content types for ``data`` URIs. + + :arg attr_val_is_uri: set of attributes that have URI values--values + that have a scheme not listed in ``allowed_protocols`` are removed + + :arg svg_attr_val_allows_ref: set of SVG attributes that can have + references + + :arg svg_allow_local_href: set of SVG elements that can have local + hrefs--these are removed + + """ + super(Filter, self).__init__(source) + self.allowed_elements = allowed_elements + self.allowed_attributes = allowed_attributes + self.allowed_css_properties = allowed_css_properties + self.allowed_css_keywords = allowed_css_keywords + self.allowed_svg_properties = allowed_svg_properties + self.allowed_protocols = allowed_protocols + self.allowed_content_types = allowed_content_types + self.attr_val_is_uri = attr_val_is_uri + self.svg_attr_val_allows_ref = svg_attr_val_allows_ref + self.svg_allow_local_href = svg_allow_local_href + + def __iter__(self): + for token in base.Filter.__iter__(self): + token = self.sanitize_token(token) + if token: + yield token + + # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and + # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes + # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and + # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI + # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are + # allowed. + # + # sanitize_html('<script> do_nasty_stuff() </script>') + # => <script> do_nasty_stuff() </script> + # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>') + # => <a>Click here for $100</a> + def sanitize_token(self, token): + + # accommodate filters which use token_type differently + token_type = token["type"] + if token_type in ("StartTag", "EndTag", "EmptyTag"): + name = token["name"] + namespace = token["namespace"] + if ((namespace, name) in self.allowed_elements or + (namespace is None and + (namespaces["html"], name) in self.allowed_elements)): + return self.allowed_token(token) + else: + return self.disallowed_token(token) + elif token_type == "Comment": + pass + else: + return token + + def allowed_token(self, token): + if "data" in token: + attrs = token["data"] + attr_names = set(attrs.keys()) + + # Remove forbidden attributes + for to_remove in (attr_names - self.allowed_attributes): + del token["data"][to_remove] + attr_names.remove(to_remove) + + # Remove attributes with disallowed URL values + for attr in (attr_names & self.attr_val_is_uri): + assert attr in attrs + # I don't have a clue where this regexp comes from or why it matches those + # characters, nor why we call unescape. I just know it's always been here. + # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all + # this will do is remove *more* than it otherwise would. + val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', + unescape(attrs[attr])).lower() + # remove replacement characters from unescaped characters + val_unescaped = val_unescaped.replace("\ufffd", "") + try: + uri = urlparse.urlparse(val_unescaped) + except ValueError: + uri = None + del attrs[attr] + if uri and uri.scheme: + if uri.scheme not in self.allowed_protocols: + del attrs[attr] + if uri.scheme == 'data': + m = data_content_type.match(uri.path) + if not m: + del attrs[attr] + elif m.group('content_type') not in self.allowed_content_types: + del attrs[attr] + + for attr in self.svg_attr_val_allows_ref: + if attr in attrs: + attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', + ' ', + unescape(attrs[attr])) + if (token["name"] in self.svg_allow_local_href and + (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', + attrs[(namespaces['xlink'], 'href')])): + del attrs[(namespaces['xlink'], 'href')] + if (None, 'style') in attrs: + attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) + token["data"] = attrs + return token + + def disallowed_token(self, token): + token_type = token["type"] + if token_type == "EndTag": + token["data"] = "</%s>" % token["name"] + elif token["data"]: + assert token_type in ("StartTag", "EmptyTag") + attrs = [] + for (ns, name), v in token["data"].items(): + attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) + token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) + else: + token["data"] = "<%s>" % token["name"] + if token.get("selfClosing"): + token["data"] = token["data"][:-1] + "/>" + + token["type"] = "Characters" + + del token["name"] + return token + + def sanitize_css(self, style): + # disallow urls + style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) + + # gauntlet + if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + return '' + if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): + return '' + + clean = [] + for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): + if not value: + continue + if prop.lower() in self.allowed_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background', 'border', 'margin', + 'padding']: + for keyword in value.split(): + if keyword not in self.allowed_css_keywords and \ + not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa + break + else: + clean.append(prop + ': ' + value + ';') + elif prop.lower() in self.allowed_svg_properties: + clean.append(prop + ': ' + value + ';') + + return ' '.join(clean) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/whitespace.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/whitespace.py new file mode 100644 index 00000000..0d12584b --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/filters/whitespace.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import, division, unicode_literals + +import re + +from . import base +from ..constants import rcdataElements, spaceCharacters +spaceCharacters = "".join(spaceCharacters) + +SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) + + +class Filter(base.Filter): + """Collapses whitespace except in pre, textarea, and script elements""" + spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) + + def __iter__(self): + preserve = 0 + for token in base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag" \ + and (preserve or token["name"] in self.spacePreserveElements): + preserve += 1 + + elif type == "EndTag" and preserve: + preserve -= 1 + + elif not preserve and type == "SpaceCharacters" and token["data"]: + # Test on token["data"] above to not introduce spaces where there were not + token["data"] = " " + + elif not preserve and type == "Characters": + token["data"] = collapse_spaces(token["data"]) + + yield token + + +def collapse_spaces(text): + return SPACES_REGEX.sub(' ', text) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/html5parser.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/html5parser.py new file mode 100644 index 00000000..ae41a133 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/html5parser.py @@ -0,0 +1,2791 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import with_metaclass, viewkeys + +import types +from collections import OrderedDict + +from . import _inputstream +from . import _tokenizer + +from . import treebuilders +from .treebuilders.base import Marker + +from . import _utils +from .constants import ( + spaceCharacters, asciiUpper2Lower, + specialElements, headingElements, cdataElements, rcdataElements, + tokenTypes, tagTokenTypes, + namespaces, + htmlIntegrationPointElements, mathmlTextIntegrationPointElements, + adjustForeignAttributes as adjustForeignAttributesMap, + adjustMathMLAttributes, adjustSVGAttributes, + E, + _ReparseException +) + + +def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): + """Parse an HTML document as a string or file-like object into a tree + + :arg doc: the document to parse as a string or file-like object + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import parse + >>> parse('<html><body><p>This is a doc</p></body></html>') + <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> + + """ + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parse(doc, **kwargs) + + +def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): + """Parse an HTML fragment as a string or file-like object into a tree + + :arg doc: the fragment to parse as a string or file-like object + + :arg container: the container context to parse the fragment in + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import parseFragment + >>> parseFragment('<b>this is a fragment</b>') + <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> + + """ + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parseFragment(doc, container=container, **kwargs) + + +def method_decorator_metaclass(function): + class Decorated(type): + def __new__(meta, classname, bases, classDict): + for attributeName, attribute in classDict.items(): + if isinstance(attribute, types.FunctionType): + attribute = function(attribute) + + classDict[attributeName] = attribute + return type.__new__(meta, classname, bases, classDict) + return Decorated + + +class HTMLParser(object): + """HTML parser + + Generates a tree structure from a stream of (possibly malformed) HTML. + + """ + + def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): + """ + :arg tree: a treebuilder class controlling the type of tree that will be + returned. Built in treebuilders can be accessed through + html5lib.treebuilders.getTreeBuilder(treeType) + + :arg strict: raise an exception when a parse error is encountered + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :arg debug: whether or not to enable debug mode which logs things + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() # generates parser with etree builder + >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict + + """ + + # Raise an exception on the first error encountered + self.strict = strict + + if tree is None: + tree = treebuilders.getTreeBuilder("etree") + self.tree = tree(namespaceHTMLElements) + self.errors = [] + + self.phases = dict([(name, cls(self, self.tree)) for name, cls in + getPhases(debug).items()]) + + def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): + + self.innerHTMLMode = innerHTML + self.container = container + self.scripting = scripting + self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) + self.reset() + + try: + self.mainLoop() + except _ReparseException: + self.reset() + self.mainLoop() + + def reset(self): + self.tree.reset() + self.firstStartTag = False + self.errors = [] + self.log = [] # only used with debug mode + # "quirks" / "limited quirks" / "no quirks" + self.compatMode = "no quirks" + + if self.innerHTMLMode: + self.innerHTML = self.container.lower() + + if self.innerHTML in cdataElements: + self.tokenizer.state = self.tokenizer.rcdataState + elif self.innerHTML in rcdataElements: + self.tokenizer.state = self.tokenizer.rawtextState + elif self.innerHTML == 'plaintext': + self.tokenizer.state = self.tokenizer.plaintextState + else: + # state already is data state + # self.tokenizer.state = self.tokenizer.dataState + pass + self.phase = self.phases["beforeHtml"] + self.phase.insertHtmlElement() + self.resetInsertionMode() + else: + self.innerHTML = False # pylint:disable=redefined-variable-type + self.phase = self.phases["initial"] + + self.lastPhase = None + + self.beforeRCDataPhase = None + + self.framesetOK = True + + @property + def documentEncoding(self): + """Name of the character encoding that was used to decode the input stream, or + :obj:`None` if that is not determined yet + + """ + if not hasattr(self, 'tokenizer'): + return None + return self.tokenizer.stream.charEncoding[0].name + + def isHTMLIntegrationPoint(self, element): + if (element.name == "annotation-xml" and + element.namespace == namespaces["mathml"]): + return ("encoding" in element.attributes and + element.attributes["encoding"].translate( + asciiUpper2Lower) in + ("text/html", "application/xhtml+xml")) + else: + return (element.namespace, element.name) in htmlIntegrationPointElements + + def isMathMLTextIntegrationPoint(self, element): + return (element.namespace, element.name) in mathmlTextIntegrationPointElements + + def mainLoop(self): + CharactersToken = tokenTypes["Characters"] + SpaceCharactersToken = tokenTypes["SpaceCharacters"] + StartTagToken = tokenTypes["StartTag"] + EndTagToken = tokenTypes["EndTag"] + CommentToken = tokenTypes["Comment"] + DoctypeToken = tokenTypes["Doctype"] + ParseErrorToken = tokenTypes["ParseError"] + + for token in self.normalizedTokens(): + prev_token = None + new_token = token + while new_token is not None: + prev_token = new_token + currentNode = self.tree.openElements[-1] if self.tree.openElements else None + currentNodeNamespace = currentNode.namespace if currentNode else None + currentNodeName = currentNode.name if currentNode else None + + type = new_token["type"] + + if type == ParseErrorToken: + self.parseError(new_token["data"], new_token.get("datavars", {})) + new_token = None + else: + if (len(self.tree.openElements) == 0 or + currentNodeNamespace == self.tree.defaultNamespace or + (self.isMathMLTextIntegrationPoint(currentNode) and + ((type == StartTagToken and + token["name"] not in frozenset(["mglyph", "malignmark"])) or + type in (CharactersToken, SpaceCharactersToken))) or + (currentNodeNamespace == namespaces["mathml"] and + currentNodeName == "annotation-xml" and + type == StartTagToken and + token["name"] == "svg") or + (self.isHTMLIntegrationPoint(currentNode) and + type in (StartTagToken, CharactersToken, SpaceCharactersToken))): + phase = self.phase + else: + phase = self.phases["inForeignContent"] + + if type == CharactersToken: + new_token = phase.processCharacters(new_token) + elif type == SpaceCharactersToken: + new_token = phase.processSpaceCharacters(new_token) + elif type == StartTagToken: + new_token = phase.processStartTag(new_token) + elif type == EndTagToken: + new_token = phase.processEndTag(new_token) + elif type == CommentToken: + new_token = phase.processComment(new_token) + elif type == DoctypeToken: + new_token = phase.processDoctype(new_token) + + if (type == StartTagToken and prev_token["selfClosing"] and + not prev_token["selfClosingAcknowledged"]): + self.parseError("non-void-element-with-trailing-solidus", + {"name": prev_token["name"]}) + + # When the loop finishes it's EOF + reprocess = True + phases = [] + while reprocess: + phases.append(self.phase) + reprocess = self.phase.processEOF() + if reprocess: + assert self.phase not in phases + + def normalizedTokens(self): + for token in self.tokenizer: + yield self.normalizeToken(token) + + def parse(self, stream, *args, **kwargs): + """Parse a HTML document into a well-formed tree + + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element). + + :arg scripting: treat noscript elements as if JavaScript was turned on + + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parse('<html><body><p>This is a doc</p></body></html>') + <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> + + """ + self._parse(stream, False, None, *args, **kwargs) + return self.tree.getDocument() + + def parseFragment(self, stream, *args, **kwargs): + """Parse a HTML fragment into a well-formed tree fragment + + :arg container: name of the element we're setting the innerHTML + property if set to None, default to 'div' + + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + + :arg scripting: treat noscript elements as if JavaScript was turned on + + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parseFragment('<b>this is a fragment</b>') + <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> + + """ + self._parse(stream, True, *args, **kwargs) + return self.tree.getFragment() + + def parseError(self, errorcode="XXX-undefined-error", datavars=None): + # XXX The idea is to make errorcode mandatory. + if datavars is None: + datavars = {} + self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) + if self.strict: + raise ParseError(E[errorcode] % datavars) + + def normalizeToken(self, token): + # HTML5 specific normalizations to the token stream + if token["type"] == tokenTypes["StartTag"]: + raw = token["data"] + token["data"] = OrderedDict(raw) + if len(raw) > len(token["data"]): + # we had some duplicated attribute, fix so first wins + token["data"].update(raw[::-1]) + + return token + + def adjustMathMLAttributes(self, token): + adjust_attributes(token, adjustMathMLAttributes) + + def adjustSVGAttributes(self, token): + adjust_attributes(token, adjustSVGAttributes) + + def adjustForeignAttributes(self, token): + adjust_attributes(token, adjustForeignAttributesMap) + + def reparseTokenNormal(self, token): + # pylint:disable=unused-argument + self.parser.phase() + + def resetInsertionMode(self): + # The name of this method is mostly historical. (It's also used in the + # specification.) + last = False + newModes = { + "select": "inSelect", + "td": "inCell", + "th": "inCell", + "tr": "inRow", + "tbody": "inTableBody", + "thead": "inTableBody", + "tfoot": "inTableBody", + "caption": "inCaption", + "colgroup": "inColumnGroup", + "table": "inTable", + "head": "inBody", + "body": "inBody", + "frameset": "inFrameset", + "html": "beforeHead" + } + for node in self.tree.openElements[::-1]: + nodeName = node.name + new_phase = None + if node == self.tree.openElements[0]: + assert self.innerHTML + last = True + nodeName = self.innerHTML + # Check for conditions that should only happen in the innerHTML + # case + if nodeName in ("select", "colgroup", "head", "html"): + assert self.innerHTML + + if not last and node.namespace != self.tree.defaultNamespace: + continue + + if nodeName in newModes: + new_phase = self.phases[newModes[nodeName]] + break + elif last: + new_phase = self.phases["inBody"] + break + + self.phase = new_phase + + def parseRCDataRawtext(self, token, contentType): + # Generic RCDATA/RAWTEXT Parsing algorithm + assert contentType in ("RAWTEXT", "RCDATA") + + self.tree.insertElement(token) + + if contentType == "RAWTEXT": + self.tokenizer.state = self.tokenizer.rawtextState + else: + self.tokenizer.state = self.tokenizer.rcdataState + + self.originalPhase = self.phase + + self.phase = self.phases["text"] + + +@_utils.memoize +def getPhases(debug): + def log(function): + """Logger that records which phase processes each token""" + type_names = dict((value, key) for key, value in + tokenTypes.items()) + + def wrapped(self, *args, **kwargs): + if function.__name__.startswith("process") and len(args) > 0: + token = args[0] + try: + info = {"type": type_names[token['type']]} + except: + raise + if token['type'] in tagTokenTypes: + info["name"] = token['name'] + + self.parser.log.append((self.parser.tokenizer.state.__name__, + self.parser.phase.__class__.__name__, + self.__class__.__name__, + function.__name__, + info)) + return function(self, *args, **kwargs) + else: + return function(self, *args, **kwargs) + return wrapped + + def getMetaclass(use_metaclass, metaclass_func): + if use_metaclass: + return method_decorator_metaclass(metaclass_func) + else: + return type + + # pylint:disable=unused-argument + class Phase(with_metaclass(getMetaclass(debug, log))): + """Base class for helper object that implements each phase of processing + """ + + def __init__(self, parser, tree): + self.parser = parser + self.tree = tree + + def processEOF(self): + raise NotImplementedError + + def processComment(self, token): + # For most phases the following is correct. Where it's not it will be + # overridden. + self.tree.insertComment(token, self.tree.openElements[-1]) + + def processDoctype(self, token): + self.parser.parseError("unexpected-doctype") + + def processCharacters(self, token): + self.tree.insertText(token["data"]) + + def processSpaceCharacters(self, token): + self.tree.insertText(token["data"]) + + def processStartTag(self, token): + return self.startTagHandler[token["name"]](token) + + def startTagHtml(self, token): + if not self.parser.firstStartTag and token["name"] == "html": + self.parser.parseError("non-html-root") + # XXX Need a check here to see if the first start tag token emitted is + # this token... If it's not, invoke self.parser.parseError(). + for attr, value in token["data"].items(): + if attr not in self.tree.openElements[0].attributes: + self.tree.openElements[0].attributes[attr] = value + self.parser.firstStartTag = False + + def processEndTag(self, token): + return self.endTagHandler[token["name"]](token) + + class InitialPhase(Phase): + def processSpaceCharacters(self, token): + pass + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + correct = token["correct"] + + if (name != "html" or publicId is not None or + systemId is not None and systemId != "about:legacy-compat"): + self.parser.parseError("unknown-doctype") + + if publicId is None: + publicId = "" + + self.tree.insertDoctype(token) + + if publicId != "": + publicId = publicId.translate(asciiUpper2Lower) + + if (not correct or token["name"] != "html" or + publicId.startswith( + ("+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//")) or + publicId in ("-//w3o//dtd w3 html strict 3.0//en//", + "-/w3c/dtd html 4.0 transitional/en", + "html") or + publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is None or + systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): + self.parser.compatMode = "quirks" + elif (publicId.startswith( + ("-//w3c//dtd xhtml 1.0 frameset//", + "-//w3c//dtd xhtml 1.0 transitional//")) or + publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is not None): + self.parser.compatMode = "limited quirks" + + self.parser.phase = self.parser.phases["beforeHtml"] + + def anythingElse(self): + self.parser.compatMode = "quirks" + self.parser.phase = self.parser.phases["beforeHtml"] + + def processCharacters(self, token): + self.parser.parseError("expected-doctype-but-got-chars") + self.anythingElse() + return token + + def processStartTag(self, token): + self.parser.parseError("expected-doctype-but-got-start-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEndTag(self, token): + self.parser.parseError("expected-doctype-but-got-end-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEOF(self): + self.parser.parseError("expected-doctype-but-got-eof") + self.anythingElse() + return True + + class BeforeHtmlPhase(Phase): + # helper methods + def insertHtmlElement(self): + self.tree.insertRoot(impliedTagToken("html", "StartTag")) + self.parser.phase = self.parser.phases["beforeHead"] + + # other + def processEOF(self): + self.insertHtmlElement() + return True + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.insertHtmlElement() + return token + + def processStartTag(self, token): + if token["name"] == "html": + self.parser.firstStartTag = True + self.insertHtmlElement() + return token + + def processEndTag(self, token): + if token["name"] not in ("head", "body", "html", "br"): + self.parser.parseError("unexpected-end-tag-before-html", + {"name": token["name"]}) + else: + self.insertHtmlElement() + return token + + class BeforeHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + (("head", "body", "html", "br"), self.endTagImplyHead) + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.startTagHead(impliedTagToken("head", "StartTag")) + return True + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.tree.insertElement(token) + self.tree.headPointer = self.tree.openElements[-1] + self.parser.phase = self.parser.phases["inHead"] + + def startTagOther(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagImplyHead(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagOther(self, token): + self.parser.parseError("end-tag-after-implied-root", + {"name": token["name"]}) + + class InHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("title", self.startTagTitle), + (("noframes", "style"), self.startTagNoFramesStyle), + ("noscript", self.startTagNoscript), + ("script", self.startTagScript), + (("base", "basefont", "bgsound", "command", "link"), + self.startTagBaseLinkCommand), + ("meta", self.startTagMeta), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("head", self.endTagHead), + (("br", "html", "body"), self.endTagHtmlBodyBr) + ]) + self.endTagHandler.default = self.endTagOther + + # the real thing + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.parser.parseError("two-heads-are-not-better-than-one") + + def startTagBaseLinkCommand(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagMeta(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + attributes = token["data"] + if self.parser.tokenizer.stream.charEncoding[1] == "tentative": + if "charset" in attributes: + self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) + elif ("content" in attributes and + "http-equiv" in attributes and + attributes["http-equiv"].lower() == "content-type"): + # Encoding it as UTF-8 here is a hack, as really we should pass + # the abstract Unicode string, and just use the + # ContentAttrParser on that, but using UTF-8 allows all chars + # to be encoded and as a ASCII-superset works. + data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) + parser = _inputstream.ContentAttrParser(data) + codec = parser.parse() + self.parser.tokenizer.stream.changeEncoding(codec) + + def startTagTitle(self, token): + self.parser.parseRCDataRawtext(token, "RCDATA") + + def startTagNoFramesStyle(self, token): + # Need to decide whether to implement the scripting-disabled case + self.parser.parseRCDataRawtext(token, "RAWTEXT") + + def startTagNoscript(self, token): + if self.parser.scripting: + self.parser.parseRCDataRawtext(token, "RAWTEXT") + else: + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inHeadNoscript"] + + def startTagScript(self, token): + self.tree.insertElement(token) + self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState + self.parser.originalPhase = self.parser.phase + self.parser.phase = self.parser.phases["text"] + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHead(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "head", "Expected head got %s" % node.name + self.parser.phase = self.parser.phases["afterHead"] + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.endTagHead(impliedTagToken("head")) + + class InHeadNoscriptPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), + (("head", "noscript"), self.startTagHeadNoscript), + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("noscript", self.endTagNoscript), + ("br", self.endTagBr), + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.parser.parseError("eof-in-head-noscript") + self.anythingElse() + return True + + def processComment(self, token): + return self.parser.phases["inHead"].processComment(token) + + def processCharacters(self, token): + self.parser.parseError("char-in-head-noscript") + self.anythingElse() + return token + + def processSpaceCharacters(self, token): + return self.parser.phases["inHead"].processSpaceCharacters(token) + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBaseLinkCommand(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagHeadNoscript(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagNoscript(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "noscript", "Expected noscript got %s" % node.name + self.parser.phase = self.parser.phases["inHead"] + + def endTagBr(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + # Caller must raise parse error first! + self.endTagNoscript(impliedTagToken("noscript")) + + class AfterHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("body", self.startTagBody), + ("frameset", self.startTagFrameset), + (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", + "style", "title"), + self.startTagFromHead), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), + self.endTagHtmlBodyBr)]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBody(self, token): + self.parser.framesetOK = False + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inBody"] + + def startTagFrameset(self, token): + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inFrameset"] + + def startTagFromHead(self, token): + self.parser.parseError("unexpected-start-tag-out-of-my-head", + {"name": token["name"]}) + self.tree.openElements.append(self.tree.headPointer) + self.parser.phases["inHead"].processStartTag(token) + for node in self.tree.openElements[::-1]: + if node.name == "head": + self.tree.openElements.remove(node) + break + + def startTagHead(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.tree.insertElement(impliedTagToken("body", "StartTag")) + self.parser.phase = self.parser.phases["inBody"] + self.parser.framesetOK = True + + class InBodyPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody + # the really-really-really-very crazy mode + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + # Set this to the default handler + self.processSpaceCharacters = self.processSpaceCharactersNonPre + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("base", "basefont", "bgsound", "command", "link", "meta", + "script", "style", "title"), + self.startTagProcessInHead), + ("body", self.startTagBody), + ("frameset", self.startTagFrameset), + (("address", "article", "aside", "blockquote", "center", "details", + "dir", "div", "dl", "fieldset", "figcaption", "figure", + "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", + "section", "summary", "ul"), + self.startTagCloseP), + (headingElements, self.startTagHeading), + (("pre", "listing"), self.startTagPreListing), + ("form", self.startTagForm), + (("li", "dd", "dt"), self.startTagListItem), + ("plaintext", self.startTagPlaintext), + ("a", self.startTagA), + (("b", "big", "code", "em", "font", "i", "s", "small", "strike", + "strong", "tt", "u"), self.startTagFormatting), + ("nobr", self.startTagNobr), + ("button", self.startTagButton), + (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), + ("xmp", self.startTagXmp), + ("table", self.startTagTable), + (("area", "br", "embed", "img", "keygen", "wbr"), + self.startTagVoidFormatting), + (("param", "source", "track"), self.startTagParamSource), + ("input", self.startTagInput), + ("hr", self.startTagHr), + ("image", self.startTagImage), + ("isindex", self.startTagIsIndex), + ("textarea", self.startTagTextarea), + ("iframe", self.startTagIFrame), + ("noscript", self.startTagNoscript), + (("noembed", "noframes"), self.startTagRawtext), + ("select", self.startTagSelect), + (("rp", "rt"), self.startTagRpRt), + (("option", "optgroup"), self.startTagOpt), + (("math"), self.startTagMath), + (("svg"), self.startTagSvg), + (("caption", "col", "colgroup", "frame", "head", + "tbody", "td", "tfoot", "th", "thead", + "tr"), self.startTagMisplaced) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("body", self.endTagBody), + ("html", self.endTagHtml), + (("address", "article", "aside", "blockquote", "button", "center", + "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", + "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", + "section", "summary", "ul"), self.endTagBlock), + ("form", self.endTagForm), + ("p", self.endTagP), + (("dd", "dt", "li"), self.endTagListItem), + (headingElements, self.endTagHeading), + (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", + "strike", "strong", "tt", "u"), self.endTagFormatting), + (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), + ("br", self.endTagBr), + ]) + self.endTagHandler.default = self.endTagOther + + def isMatchingFormattingElement(self, node1, node2): + return (node1.name == node2.name and + node1.namespace == node2.namespace and + node1.attributes == node2.attributes) + + # helper + def addFormattingElement(self, token): + self.tree.insertElement(token) + element = self.tree.openElements[-1] + + matchingElements = [] + for node in self.tree.activeFormattingElements[::-1]: + if node is Marker: + break + elif self.isMatchingFormattingElement(node, element): + matchingElements.append(node) + + assert len(matchingElements) <= 3 + if len(matchingElements) == 3: + self.tree.activeFormattingElements.remove(matchingElements[-1]) + self.tree.activeFormattingElements.append(element) + + # the real deal + def processEOF(self): + allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", + "tfoot", "th", "thead", "tr", "body", + "html")) + for node in self.tree.openElements[::-1]: + if node.name not in allowed_elements: + self.parser.parseError("expected-closing-tag-but-got-eof") + break + # Stop parsing + + def processSpaceCharactersDropNewline(self, token): + # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we + # want to drop leading newlines + data = token["data"] + self.processSpaceCharacters = self.processSpaceCharactersNonPre + if (data.startswith("\n") and + self.tree.openElements[-1].name in ("pre", "listing", "textarea") and + not self.tree.openElements[-1].hasContent()): + data = data[1:] + if data: + self.tree.reconstructActiveFormattingElements() + self.tree.insertText(data) + + def processCharacters(self, token): + if token["data"] == "\u0000": + # The tokenizer should always emit null on its own + return + self.tree.reconstructActiveFormattingElements() + self.tree.insertText(token["data"]) + # This must be bad for performance + if (self.parser.framesetOK and + any([char not in spaceCharacters + for char in token["data"]])): + self.parser.framesetOK = False + + def processSpaceCharactersNonPre(self, token): + self.tree.reconstructActiveFormattingElements() + self.tree.insertText(token["data"]) + + def startTagProcessInHead(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagBody(self, token): + self.parser.parseError("unexpected-start-tag", {"name": "body"}) + if (len(self.tree.openElements) == 1 or + self.tree.openElements[1].name != "body"): + assert self.parser.innerHTML + else: + self.parser.framesetOK = False + for attr, value in token["data"].items(): + if attr not in self.tree.openElements[1].attributes: + self.tree.openElements[1].attributes[attr] = value + + def startTagFrameset(self, token): + self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) + if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): + assert self.parser.innerHTML + elif not self.parser.framesetOK: + pass + else: + if self.tree.openElements[1].parent: + self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) + while self.tree.openElements[-1].name != "html": + self.tree.openElements.pop() + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inFrameset"] + + def startTagCloseP(self, token): + if self.tree.elementInScope("p", variant="button"): + self.endTagP(impliedTagToken("p")) + self.tree.insertElement(token) + + def startTagPreListing(self, token): + if self.tree.elementInScope("p", variant="button"): + self.endTagP(impliedTagToken("p")) + self.tree.insertElement(token) + self.parser.framesetOK = False + self.processSpaceCharacters = self.processSpaceCharactersDropNewline + + def startTagForm(self, token): + if self.tree.formPointer: + self.parser.parseError("unexpected-start-tag", {"name": "form"}) + else: + if self.tree.elementInScope("p", variant="button"): + self.endTagP(impliedTagToken("p")) + self.tree.insertElement(token) + self.tree.formPointer = self.tree.openElements[-1] + + def startTagListItem(self, token): + self.parser.framesetOK = False + + stopNamesMap = {"li": ["li"], + "dt": ["dt", "dd"], + "dd": ["dt", "dd"]} + stopNames = stopNamesMap[token["name"]] + for node in reversed(self.tree.openElements): + if node.name in stopNames: + self.parser.phase.processEndTag( + impliedTagToken(node.name, "EndTag")) + break + if (node.nameTuple in specialElements and + node.name not in ("address", "div", "p")): + break + + if self.tree.elementInScope("p", variant="button"): + self.parser.phase.processEndTag( + impliedTagToken("p", "EndTag")) + + self.tree.insertElement(token) + + def startTagPlaintext(self, token): + if self.tree.elementInScope("p", variant="button"): + self.endTagP(impliedTagToken("p")) + self.tree.insertElement(token) + self.parser.tokenizer.state = self.parser.tokenizer.plaintextState + + def startTagHeading(self, token): + if self.tree.elementInScope("p", variant="button"): + self.endTagP(impliedTagToken("p")) + if self.tree.openElements[-1].name in headingElements: + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + self.tree.openElements.pop() + self.tree.insertElement(token) + + def startTagA(self, token): + afeAElement = self.tree.elementInActiveFormattingElements("a") + if afeAElement: + self.parser.parseError("unexpected-start-tag-implies-end-tag", + {"startName": "a", "endName": "a"}) + self.endTagFormatting(impliedTagToken("a")) + if afeAElement in self.tree.openElements: + self.tree.openElements.remove(afeAElement) + if afeAElement in self.tree.activeFormattingElements: + self.tree.activeFormattingElements.remove(afeAElement) + self.tree.reconstructActiveFormattingElements() + self.addFormattingElement(token) + + def startTagFormatting(self, token): + self.tree.reconstructActiveFormattingElements() + self.addFormattingElement(token) + + def startTagNobr(self, token): + self.tree.reconstructActiveFormattingElements() + if self.tree.elementInScope("nobr"): + self.parser.parseError("unexpected-start-tag-implies-end-tag", + {"startName": "nobr", "endName": "nobr"}) + self.processEndTag(impliedTagToken("nobr")) + # XXX Need tests that trigger the following + self.tree.reconstructActiveFormattingElements() + self.addFormattingElement(token) + + def startTagButton(self, token): + if self.tree.elementInScope("button"): + self.parser.parseError("unexpected-start-tag-implies-end-tag", + {"startName": "button", "endName": "button"}) + self.processEndTag(impliedTagToken("button")) + return token + else: + self.tree.reconstructActiveFormattingElements() + self.tree.insertElement(token) + self.parser.framesetOK = False + + def startTagAppletMarqueeObject(self, token): + self.tree.reconstructActiveFormattingElements() + self.tree.insertElement(token) + self.tree.activeFormattingElements.append(Marker) + self.parser.framesetOK = False + + def startTagXmp(self, token): + if self.tree.elementInScope("p", variant="button"): + self.endTagP(impliedTagToken("p")) + self.tree.reconstructActiveFormattingElements() + self.parser.framesetOK = False + self.parser.parseRCDataRawtext(token, "RAWTEXT") + + def startTagTable(self, token): + if self.parser.compatMode != "quirks": + if self.tree.elementInScope("p", variant="button"): + self.processEndTag(impliedTagToken("p")) + self.tree.insertElement(token) + self.parser.framesetOK = False + self.parser.phase = self.parser.phases["inTable"] + + def startTagVoidFormatting(self, token): + self.tree.reconstructActiveFormattingElements() + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + self.parser.framesetOK = False + + def startTagInput(self, token): + framesetOK = self.parser.framesetOK + self.startTagVoidFormatting(token) + if ("type" in token["data"] and + token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): + # input type=hidden doesn't change framesetOK + self.parser.framesetOK = framesetOK + + def startTagParamSource(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagHr(self, token): + if self.tree.elementInScope("p", variant="button"): + self.endTagP(impliedTagToken("p")) + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + self.parser.framesetOK = False + + def startTagImage(self, token): + # No really... + self.parser.parseError("unexpected-start-tag-treated-as", + {"originalName": "image", "newName": "img"}) + self.processStartTag(impliedTagToken("img", "StartTag", + attributes=token["data"], + selfClosing=token["selfClosing"])) + + def startTagIsIndex(self, token): + self.parser.parseError("deprecated-tag", {"name": "isindex"}) + if self.tree.formPointer: + return + form_attrs = {} + if "action" in token["data"]: + form_attrs["action"] = token["data"]["action"] + self.processStartTag(impliedTagToken("form", "StartTag", + attributes=form_attrs)) + self.processStartTag(impliedTagToken("hr", "StartTag")) + self.processStartTag(impliedTagToken("label", "StartTag")) + # XXX Localization ... + if "prompt" in token["data"]: + prompt = token["data"]["prompt"] + else: + prompt = "This is a searchable index. Enter search keywords: " + self.processCharacters( + {"type": tokenTypes["Characters"], "data": prompt}) + attributes = token["data"].copy() + if "action" in attributes: + del attributes["action"] + if "prompt" in attributes: + del attributes["prompt"] + attributes["name"] = "isindex" + self.processStartTag(impliedTagToken("input", "StartTag", + attributes=attributes, + selfClosing=token["selfClosing"])) + self.processEndTag(impliedTagToken("label")) + self.processStartTag(impliedTagToken("hr", "StartTag")) + self.processEndTag(impliedTagToken("form")) + + def startTagTextarea(self, token): + self.tree.insertElement(token) + self.parser.tokenizer.state = self.parser.tokenizer.rcdataState + self.processSpaceCharacters = self.processSpaceCharactersDropNewline + self.parser.framesetOK = False + + def startTagIFrame(self, token): + self.parser.framesetOK = False + self.startTagRawtext(token) + + def startTagNoscript(self, token): + if self.parser.scripting: + self.startTagRawtext(token) + else: + self.startTagOther(token) + + def startTagRawtext(self, token): + """iframe, noembed noframes, noscript(if scripting enabled)""" + self.parser.parseRCDataRawtext(token, "RAWTEXT") + + def startTagOpt(self, token): + if self.tree.openElements[-1].name == "option": + self.parser.phase.processEndTag(impliedTagToken("option")) + self.tree.reconstructActiveFormattingElements() + self.parser.tree.insertElement(token) + + def startTagSelect(self, token): + self.tree.reconstructActiveFormattingElements() + self.tree.insertElement(token) + self.parser.framesetOK = False + if self.parser.phase in (self.parser.phases["inTable"], + self.parser.phases["inCaption"], + self.parser.phases["inColumnGroup"], + self.parser.phases["inTableBody"], + self.parser.phases["inRow"], + self.parser.phases["inCell"]): + self.parser.phase = self.parser.phases["inSelectInTable"] + else: + self.parser.phase = self.parser.phases["inSelect"] + + def startTagRpRt(self, token): + if self.tree.elementInScope("ruby"): + self.tree.generateImpliedEndTags() + if self.tree.openElements[-1].name != "ruby": + self.parser.parseError() + self.tree.insertElement(token) + + def startTagMath(self, token): + self.tree.reconstructActiveFormattingElements() + self.parser.adjustMathMLAttributes(token) + self.parser.adjustForeignAttributes(token) + token["namespace"] = namespaces["mathml"] + self.tree.insertElement(token) + # Need to get the parse error right for the case where the token + # has a namespace not equal to the xmlns attribute + if token["selfClosing"]: + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagSvg(self, token): + self.tree.reconstructActiveFormattingElements() + self.parser.adjustSVGAttributes(token) + self.parser.adjustForeignAttributes(token) + token["namespace"] = namespaces["svg"] + self.tree.insertElement(token) + # Need to get the parse error right for the case where the token + # has a namespace not equal to the xmlns attribute + if token["selfClosing"]: + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagMisplaced(self, token): + """ Elements that should be children of other elements that have a + different insertion mode; here they are ignored + "caption", "col", "colgroup", "frame", "frameset", "head", + "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", + "tr", "noscript" + """ + self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) + + def startTagOther(self, token): + self.tree.reconstructActiveFormattingElements() + self.tree.insertElement(token) + + def endTagP(self, token): + if not self.tree.elementInScope("p", variant="button"): + self.startTagCloseP(impliedTagToken("p", "StartTag")) + self.parser.parseError("unexpected-end-tag", {"name": "p"}) + self.endTagP(impliedTagToken("p", "EndTag")) + else: + self.tree.generateImpliedEndTags("p") + if self.tree.openElements[-1].name != "p": + self.parser.parseError("unexpected-end-tag", {"name": "p"}) + node = self.tree.openElements.pop() + while node.name != "p": + node = self.tree.openElements.pop() + + def endTagBody(self, token): + if not self.tree.elementInScope("body"): + self.parser.parseError() + return + elif self.tree.openElements[-1].name != "body": + for node in self.tree.openElements[2:]: + if node.name not in frozenset(("dd", "dt", "li", "optgroup", + "option", "p", "rp", "rt", + "tbody", "td", "tfoot", + "th", "thead", "tr", "body", + "html")): + # Not sure this is the correct name for the parse error + self.parser.parseError( + "expected-one-end-tag-but-got-another", + {"gotName": "body", "expectedName": node.name}) + break + self.parser.phase = self.parser.phases["afterBody"] + + def endTagHtml(self, token): + # We repeat the test for the body end tag token being ignored here + if self.tree.elementInScope("body"): + self.endTagBody(impliedTagToken("body")) + return token + + def endTagBlock(self, token): + # Put us back in the right whitespace handling mode + if token["name"] == "pre": + self.processSpaceCharacters = self.processSpaceCharactersNonPre + inScope = self.tree.elementInScope(token["name"]) + if inScope: + self.tree.generateImpliedEndTags() + if self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("end-tag-too-early", {"name": token["name"]}) + if inScope: + node = self.tree.openElements.pop() + while node.name != token["name"]: + node = self.tree.openElements.pop() + + def endTagForm(self, token): + node = self.tree.formPointer + self.tree.formPointer = None + if node is None or not self.tree.elementInScope(node): + self.parser.parseError("unexpected-end-tag", + {"name": "form"}) + else: + self.tree.generateImpliedEndTags() + if self.tree.openElements[-1] != node: + self.parser.parseError("end-tag-too-early-ignored", + {"name": "form"}) + self.tree.openElements.remove(node) + + def endTagListItem(self, token): + if token["name"] == "li": + variant = "list" + else: + variant = None + if not self.tree.elementInScope(token["name"], variant=variant): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + else: + self.tree.generateImpliedEndTags(exclude=token["name"]) + if self.tree.openElements[-1].name != token["name"]: + self.parser.parseError( + "end-tag-too-early", + {"name": token["name"]}) + node = self.tree.openElements.pop() + while node.name != token["name"]: + node = self.tree.openElements.pop() + + def endTagHeading(self, token): + for item in headingElements: + if self.tree.elementInScope(item): + self.tree.generateImpliedEndTags() + break + if self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("end-tag-too-early", {"name": token["name"]}) + + for item in headingElements: + if self.tree.elementInScope(item): + item = self.tree.openElements.pop() + while item.name not in headingElements: + item = self.tree.openElements.pop() + break + + def endTagFormatting(self, token): + """The much-feared adoption agency algorithm""" + # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 + # XXX Better parseError messages appreciated. + + # Step 1 + outerLoopCounter = 0 + + # Step 2 + while outerLoopCounter < 8: + + # Step 3 + outerLoopCounter += 1 + + # Step 4: + + # Let the formatting element be the last element in + # the list of active formatting elements that: + # - is between the end of the list and the last scope + # marker in the list, if any, or the start of the list + # otherwise, and + # - has the same tag name as the token. + formattingElement = self.tree.elementInActiveFormattingElements( + token["name"]) + if (not formattingElement or + (formattingElement in self.tree.openElements and + not self.tree.elementInScope(formattingElement.name))): + # If there is no such node, then abort these steps + # and instead act as described in the "any other + # end tag" entry below. + self.endTagOther(token) + return + + # Otherwise, if there is such a node, but that node is + # not in the stack of open elements, then this is a + # parse error; remove the element from the list, and + # abort these steps. + elif formattingElement not in self.tree.openElements: + self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) + self.tree.activeFormattingElements.remove(formattingElement) + return + + # Otherwise, if there is such a node, and that node is + # also in the stack of open elements, but the element + # is not in scope, then this is a parse error; ignore + # the token, and abort these steps. + elif not self.tree.elementInScope(formattingElement.name): + self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) + return + + # Otherwise, there is a formatting element and that + # element is in the stack and is in scope. If the + # element is not the current node, this is a parse + # error. In any case, proceed with the algorithm as + # written in the following steps. + else: + if formattingElement != self.tree.openElements[-1]: + self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) + + # Step 5: + + # Let the furthest block be the topmost node in the + # stack of open elements that is lower in the stack + # than the formatting element, and is an element in + # the special category. There might not be one. + afeIndex = self.tree.openElements.index(formattingElement) + furthestBlock = None + for element in self.tree.openElements[afeIndex:]: + if element.nameTuple in specialElements: + furthestBlock = element + break + + # Step 6: + + # If there is no furthest block, then the UA must + # first pop all the nodes from the bottom of the stack + # of open elements, from the current node up to and + # including the formatting element, then remove the + # formatting element from the list of active + # formatting elements, and finally abort these steps. + if furthestBlock is None: + element = self.tree.openElements.pop() + while element != formattingElement: + element = self.tree.openElements.pop() + self.tree.activeFormattingElements.remove(element) + return + + # Step 7 + commonAncestor = self.tree.openElements[afeIndex - 1] + + # Step 8: + # The bookmark is supposed to help us identify where to reinsert + # nodes in step 15. We have to ensure that we reinsert nodes after + # the node before the active formatting element. Note the bookmark + # can move in step 9.7 + bookmark = self.tree.activeFormattingElements.index(formattingElement) + + # Step 9 + lastNode = node = furthestBlock + innerLoopCounter = 0 + + index = self.tree.openElements.index(node) + while innerLoopCounter < 3: + innerLoopCounter += 1 + # Node is element before node in open elements + index -= 1 + node = self.tree.openElements[index] + if node not in self.tree.activeFormattingElements: + self.tree.openElements.remove(node) + continue + # Step 9.6 + if node == formattingElement: + break + # Step 9.7 + if lastNode == furthestBlock: + bookmark = self.tree.activeFormattingElements.index(node) + 1 + # Step 9.8 + clone = node.cloneNode() + # Replace node with clone + self.tree.activeFormattingElements[ + self.tree.activeFormattingElements.index(node)] = clone + self.tree.openElements[ + self.tree.openElements.index(node)] = clone + node = clone + # Step 9.9 + # Remove lastNode from its parents, if any + if lastNode.parent: + lastNode.parent.removeChild(lastNode) + node.appendChild(lastNode) + # Step 9.10 + lastNode = node + + # Step 10 + # Foster parent lastNode if commonAncestor is a + # table, tbody, tfoot, thead, or tr we need to foster + # parent the lastNode + if lastNode.parent: + lastNode.parent.removeChild(lastNode) + + if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): + parent, insertBefore = self.tree.getTableMisnestedNodePosition() + parent.insertBefore(lastNode, insertBefore) + else: + commonAncestor.appendChild(lastNode) + + # Step 11 + clone = formattingElement.cloneNode() + + # Step 12 + furthestBlock.reparentChildren(clone) + + # Step 13 + furthestBlock.appendChild(clone) + + # Step 14 + self.tree.activeFormattingElements.remove(formattingElement) + self.tree.activeFormattingElements.insert(bookmark, clone) + + # Step 15 + self.tree.openElements.remove(formattingElement) + self.tree.openElements.insert( + self.tree.openElements.index(furthestBlock) + 1, clone) + + def endTagAppletMarqueeObject(self, token): + if self.tree.elementInScope(token["name"]): + self.tree.generateImpliedEndTags() + if self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("end-tag-too-early", {"name": token["name"]}) + + if self.tree.elementInScope(token["name"]): + element = self.tree.openElements.pop() + while element.name != token["name"]: + element = self.tree.openElements.pop() + self.tree.clearActiveFormattingElements() + + def endTagBr(self, token): + self.parser.parseError("unexpected-end-tag-treated-as", + {"originalName": "br", "newName": "br element"}) + self.tree.reconstructActiveFormattingElements() + self.tree.insertElement(impliedTagToken("br", "StartTag")) + self.tree.openElements.pop() + + def endTagOther(self, token): + for node in self.tree.openElements[::-1]: + if node.name == token["name"]: + self.tree.generateImpliedEndTags(exclude=token["name"]) + if self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + while self.tree.openElements.pop() != node: + pass + break + else: + if node.nameTuple in specialElements: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + break + + class TextPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + self.startTagHandler = _utils.MethodDispatcher([]) + self.startTagHandler.default = self.startTagOther + self.endTagHandler = _utils.MethodDispatcher([ + ("script", self.endTagScript)]) + self.endTagHandler.default = self.endTagOther + + def processCharacters(self, token): + self.tree.insertText(token["data"]) + + def processEOF(self): + self.parser.parseError("expected-named-closing-tag-but-got-eof", + {"name": self.tree.openElements[-1].name}) + self.tree.openElements.pop() + self.parser.phase = self.parser.originalPhase + return True + + def startTagOther(self, token): + assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] + + def endTagScript(self, token): + node = self.tree.openElements.pop() + assert node.name == "script" + self.parser.phase = self.parser.originalPhase + # The rest of this method is all stuff that only happens if + # document.write works + + def endTagOther(self, token): + self.tree.openElements.pop() + self.parser.phase = self.parser.originalPhase + + class InTablePhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#in-table + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("caption", self.startTagCaption), + ("colgroup", self.startTagColgroup), + ("col", self.startTagCol), + (("tbody", "tfoot", "thead"), self.startTagRowGroup), + (("td", "th", "tr"), self.startTagImplyTbody), + ("table", self.startTagTable), + (("style", "script"), self.startTagStyleScript), + ("input", self.startTagInput), + ("form", self.startTagForm) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("table", self.endTagTable), + (("body", "caption", "col", "colgroup", "html", "tbody", "td", + "tfoot", "th", "thead", "tr"), self.endTagIgnore) + ]) + self.endTagHandler.default = self.endTagOther + + # helper methods + def clearStackToTableContext(self): + # "clear the stack back to a table context" + while self.tree.openElements[-1].name not in ("table", "html"): + # self.parser.parseError("unexpected-implied-end-tag-in-table", + # {"name": self.tree.openElements[-1].name}) + self.tree.openElements.pop() + # When the current node is <html> it's an innerHTML case + + # processing methods + def processEOF(self): + if self.tree.openElements[-1].name != "html": + self.parser.parseError("eof-in-table") + else: + assert self.parser.innerHTML + # Stop parsing + + def processSpaceCharacters(self, token): + originalPhase = self.parser.phase + self.parser.phase = self.parser.phases["inTableText"] + self.parser.phase.originalPhase = originalPhase + self.parser.phase.processSpaceCharacters(token) + + def processCharacters(self, token): + originalPhase = self.parser.phase + self.parser.phase = self.parser.phases["inTableText"] + self.parser.phase.originalPhase = originalPhase + self.parser.phase.processCharacters(token) + + def insertText(self, token): + # If we get here there must be at least one non-whitespace character + # Do the table magic! + self.tree.insertFromTable = True + self.parser.phases["inBody"].processCharacters(token) + self.tree.insertFromTable = False + + def startTagCaption(self, token): + self.clearStackToTableContext() + self.tree.activeFormattingElements.append(Marker) + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inCaption"] + + def startTagColgroup(self, token): + self.clearStackToTableContext() + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inColumnGroup"] + + def startTagCol(self, token): + self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) + return token + + def startTagRowGroup(self, token): + self.clearStackToTableContext() + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inTableBody"] + + def startTagImplyTbody(self, token): + self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) + return token + + def startTagTable(self, token): + self.parser.parseError("unexpected-start-tag-implies-end-tag", + {"startName": "table", "endName": "table"}) + self.parser.phase.processEndTag(impliedTagToken("table")) + if not self.parser.innerHTML: + return token + + def startTagStyleScript(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagInput(self, token): + if ("type" in token["data"] and + token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): + self.parser.parseError("unexpected-hidden-input-in-table") + self.tree.insertElement(token) + # XXX associate with form + self.tree.openElements.pop() + else: + self.startTagOther(token) + + def startTagForm(self, token): + self.parser.parseError("unexpected-form-in-table") + if self.tree.formPointer is None: + self.tree.insertElement(token) + self.tree.formPointer = self.tree.openElements[-1] + self.tree.openElements.pop() + + def startTagOther(self, token): + self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) + # Do the table magic! + self.tree.insertFromTable = True + self.parser.phases["inBody"].processStartTag(token) + self.tree.insertFromTable = False + + def endTagTable(self, token): + if self.tree.elementInScope("table", variant="table"): + self.tree.generateImpliedEndTags() + if self.tree.openElements[-1].name != "table": + self.parser.parseError("end-tag-too-early-named", + {"gotName": "table", + "expectedName": self.tree.openElements[-1].name}) + while self.tree.openElements[-1].name != "table": + self.tree.openElements.pop() + self.tree.openElements.pop() + self.parser.resetInsertionMode() + else: + # innerHTML case + assert self.parser.innerHTML + self.parser.parseError() + + def endTagIgnore(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) + # Do the table magic! + self.tree.insertFromTable = True + self.parser.phases["inBody"].processEndTag(token) + self.tree.insertFromTable = False + + class InTableTextPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + self.originalPhase = None + self.characterTokens = [] + + def flushCharacters(self): + data = "".join([item["data"] for item in self.characterTokens]) + if any([item not in spaceCharacters for item in data]): + token = {"type": tokenTypes["Characters"], "data": data} + self.parser.phases["inTable"].insertText(token) + elif data: + self.tree.insertText(data) + self.characterTokens = [] + + def processComment(self, token): + self.flushCharacters() + self.parser.phase = self.originalPhase + return token + + def processEOF(self): + self.flushCharacters() + self.parser.phase = self.originalPhase + return True + + def processCharacters(self, token): + if token["data"] == "\u0000": + return + self.characterTokens.append(token) + + def processSpaceCharacters(self, token): + # pretty sure we should never reach here + self.characterTokens.append(token) + # assert False + + def processStartTag(self, token): + self.flushCharacters() + self.parser.phase = self.originalPhase + return token + + def processEndTag(self, token): + self.flushCharacters() + self.parser.phase = self.originalPhase + return token + + class InCaptionPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#in-caption + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", + "thead", "tr"), self.startTagTableElement) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("caption", self.endTagCaption), + ("table", self.endTagTable), + (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", + "thead", "tr"), self.endTagIgnore) + ]) + self.endTagHandler.default = self.endTagOther + + def ignoreEndTagCaption(self): + return not self.tree.elementInScope("caption", variant="table") + + def processEOF(self): + self.parser.phases["inBody"].processEOF() + + def processCharacters(self, token): + return self.parser.phases["inBody"].processCharacters(token) + + def startTagTableElement(self, token): + self.parser.parseError() + # XXX Have to duplicate logic here to find out if the tag is ignored + ignoreEndTag = self.ignoreEndTagCaption() + self.parser.phase.processEndTag(impliedTagToken("caption")) + if not ignoreEndTag: + return token + + def startTagOther(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def endTagCaption(self, token): + if not self.ignoreEndTagCaption(): + # AT this code is quite similar to endTagTable in "InTable" + self.tree.generateImpliedEndTags() + if self.tree.openElements[-1].name != "caption": + self.parser.parseError("expected-one-end-tag-but-got-another", + {"gotName": "caption", + "expectedName": self.tree.openElements[-1].name}) + while self.tree.openElements[-1].name != "caption": + self.tree.openElements.pop() + self.tree.openElements.pop() + self.tree.clearActiveFormattingElements() + self.parser.phase = self.parser.phases["inTable"] + else: + # innerHTML case + assert self.parser.innerHTML + self.parser.parseError() + + def endTagTable(self, token): + self.parser.parseError() + ignoreEndTag = self.ignoreEndTagCaption() + self.parser.phase.processEndTag(impliedTagToken("caption")) + if not ignoreEndTag: + return token + + def endTagIgnore(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def endTagOther(self, token): + return self.parser.phases["inBody"].processEndTag(token) + + class InColumnGroupPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#in-column + + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("col", self.startTagCol) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("colgroup", self.endTagColgroup), + ("col", self.endTagCol) + ]) + self.endTagHandler.default = self.endTagOther + + def ignoreEndTagColgroup(self): + return self.tree.openElements[-1].name == "html" + + def processEOF(self): + if self.tree.openElements[-1].name == "html": + assert self.parser.innerHTML + return + else: + ignoreEndTag = self.ignoreEndTagColgroup() + self.endTagColgroup(impliedTagToken("colgroup")) + if not ignoreEndTag: + return True + + def processCharacters(self, token): + ignoreEndTag = self.ignoreEndTagColgroup() + self.endTagColgroup(impliedTagToken("colgroup")) + if not ignoreEndTag: + return token + + def startTagCol(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagOther(self, token): + ignoreEndTag = self.ignoreEndTagColgroup() + self.endTagColgroup(impliedTagToken("colgroup")) + if not ignoreEndTag: + return token + + def endTagColgroup(self, token): + if self.ignoreEndTagColgroup(): + # innerHTML case + assert self.parser.innerHTML + self.parser.parseError() + else: + self.tree.openElements.pop() + self.parser.phase = self.parser.phases["inTable"] + + def endTagCol(self, token): + self.parser.parseError("no-end-tag", {"name": "col"}) + + def endTagOther(self, token): + ignoreEndTag = self.ignoreEndTagColgroup() + self.endTagColgroup(impliedTagToken("colgroup")) + if not ignoreEndTag: + return token + + class InTableBodyPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("tr", self.startTagTr), + (("td", "th"), self.startTagTableCell), + (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), + self.startTagTableOther) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), + ("table", self.endTagTable), + (("body", "caption", "col", "colgroup", "html", "td", "th", + "tr"), self.endTagIgnore) + ]) + self.endTagHandler.default = self.endTagOther + + # helper methods + def clearStackToTableBodyContext(self): + while self.tree.openElements[-1].name not in ("tbody", "tfoot", + "thead", "html"): + # self.parser.parseError("unexpected-implied-end-tag-in-table", + # {"name": self.tree.openElements[-1].name}) + self.tree.openElements.pop() + if self.tree.openElements[-1].name == "html": + assert self.parser.innerHTML + + # the rest + def processEOF(self): + self.parser.phases["inTable"].processEOF() + + def processSpaceCharacters(self, token): + return self.parser.phases["inTable"].processSpaceCharacters(token) + + def processCharacters(self, token): + return self.parser.phases["inTable"].processCharacters(token) + + def startTagTr(self, token): + self.clearStackToTableBodyContext() + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inRow"] + + def startTagTableCell(self, token): + self.parser.parseError("unexpected-cell-in-table-body", + {"name": token["name"]}) + self.startTagTr(impliedTagToken("tr", "StartTag")) + return token + + def startTagTableOther(self, token): + # XXX AT Any ideas on how to share this with endTagTable? + if (self.tree.elementInScope("tbody", variant="table") or + self.tree.elementInScope("thead", variant="table") or + self.tree.elementInScope("tfoot", variant="table")): + self.clearStackToTableBodyContext() + self.endTagTableRowGroup( + impliedTagToken(self.tree.openElements[-1].name)) + return token + else: + # innerHTML case + assert self.parser.innerHTML + self.parser.parseError() + + def startTagOther(self, token): + return self.parser.phases["inTable"].processStartTag(token) + + def endTagTableRowGroup(self, token): + if self.tree.elementInScope(token["name"], variant="table"): + self.clearStackToTableBodyContext() + self.tree.openElements.pop() + self.parser.phase = self.parser.phases["inTable"] + else: + self.parser.parseError("unexpected-end-tag-in-table-body", + {"name": token["name"]}) + + def endTagTable(self, token): + if (self.tree.elementInScope("tbody", variant="table") or + self.tree.elementInScope("thead", variant="table") or + self.tree.elementInScope("tfoot", variant="table")): + self.clearStackToTableBodyContext() + self.endTagTableRowGroup( + impliedTagToken(self.tree.openElements[-1].name)) + return token + else: + # innerHTML case + assert self.parser.innerHTML + self.parser.parseError() + + def endTagIgnore(self, token): + self.parser.parseError("unexpected-end-tag-in-table-body", + {"name": token["name"]}) + + def endTagOther(self, token): + return self.parser.phases["inTable"].processEndTag(token) + + class InRowPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#in-row + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("td", "th"), self.startTagTableCell), + (("caption", "col", "colgroup", "tbody", "tfoot", "thead", + "tr"), self.startTagTableOther) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("tr", self.endTagTr), + ("table", self.endTagTable), + (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), + (("body", "caption", "col", "colgroup", "html", "td", "th"), + self.endTagIgnore) + ]) + self.endTagHandler.default = self.endTagOther + + # helper methods (XXX unify this with other table helper methods) + def clearStackToTableRowContext(self): + while self.tree.openElements[-1].name not in ("tr", "html"): + self.parser.parseError("unexpected-implied-end-tag-in-table-row", + {"name": self.tree.openElements[-1].name}) + self.tree.openElements.pop() + + def ignoreEndTagTr(self): + return not self.tree.elementInScope("tr", variant="table") + + # the rest + def processEOF(self): + self.parser.phases["inTable"].processEOF() + + def processSpaceCharacters(self, token): + return self.parser.phases["inTable"].processSpaceCharacters(token) + + def processCharacters(self, token): + return self.parser.phases["inTable"].processCharacters(token) + + def startTagTableCell(self, token): + self.clearStackToTableRowContext() + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inCell"] + self.tree.activeFormattingElements.append(Marker) + + def startTagTableOther(self, token): + ignoreEndTag = self.ignoreEndTagTr() + self.endTagTr(impliedTagToken("tr")) + # XXX how are we sure it's always ignored in the innerHTML case? + if not ignoreEndTag: + return token + + def startTagOther(self, token): + return self.parser.phases["inTable"].processStartTag(token) + + def endTagTr(self, token): + if not self.ignoreEndTagTr(): + self.clearStackToTableRowContext() + self.tree.openElements.pop() + self.parser.phase = self.parser.phases["inTableBody"] + else: + # innerHTML case + assert self.parser.innerHTML + self.parser.parseError() + + def endTagTable(self, token): + ignoreEndTag = self.ignoreEndTagTr() + self.endTagTr(impliedTagToken("tr")) + # Reprocess the current tag if the tr end tag was not ignored + # XXX how are we sure it's always ignored in the innerHTML case? + if not ignoreEndTag: + return token + + def endTagTableRowGroup(self, token): + if self.tree.elementInScope(token["name"], variant="table"): + self.endTagTr(impliedTagToken("tr")) + return token + else: + self.parser.parseError() + + def endTagIgnore(self, token): + self.parser.parseError("unexpected-end-tag-in-table-row", + {"name": token["name"]}) + + def endTagOther(self, token): + return self.parser.phases["inTable"].processEndTag(token) + + class InCellPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#in-cell + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", + "thead", "tr"), self.startTagTableOther) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + (("td", "th"), self.endTagTableCell), + (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), + (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) + ]) + self.endTagHandler.default = self.endTagOther + + # helper + def closeCell(self): + if self.tree.elementInScope("td", variant="table"): + self.endTagTableCell(impliedTagToken("td")) + elif self.tree.elementInScope("th", variant="table"): + self.endTagTableCell(impliedTagToken("th")) + + # the rest + def processEOF(self): + self.parser.phases["inBody"].processEOF() + + def processCharacters(self, token): + return self.parser.phases["inBody"].processCharacters(token) + + def startTagTableOther(self, token): + if (self.tree.elementInScope("td", variant="table") or + self.tree.elementInScope("th", variant="table")): + self.closeCell() + return token + else: + # innerHTML case + assert self.parser.innerHTML + self.parser.parseError() + + def startTagOther(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def endTagTableCell(self, token): + if self.tree.elementInScope(token["name"], variant="table"): + self.tree.generateImpliedEndTags(token["name"]) + if self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("unexpected-cell-end-tag", + {"name": token["name"]}) + while True: + node = self.tree.openElements.pop() + if node.name == token["name"]: + break + else: + self.tree.openElements.pop() + self.tree.clearActiveFormattingElements() + self.parser.phase = self.parser.phases["inRow"] + else: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def endTagIgnore(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def endTagImply(self, token): + if self.tree.elementInScope(token["name"], variant="table"): + self.closeCell() + return token + else: + # sometimes innerHTML case + self.parser.parseError() + + def endTagOther(self, token): + return self.parser.phases["inBody"].processEndTag(token) + + class InSelectPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("option", self.startTagOption), + ("optgroup", self.startTagOptgroup), + ("select", self.startTagSelect), + (("input", "keygen", "textarea"), self.startTagInput), + ("script", self.startTagScript) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("option", self.endTagOption), + ("optgroup", self.endTagOptgroup), + ("select", self.endTagSelect) + ]) + self.endTagHandler.default = self.endTagOther + + # http://www.whatwg.org/specs/web-apps/current-work/#in-select + def processEOF(self): + if self.tree.openElements[-1].name != "html": + self.parser.parseError("eof-in-select") + else: + assert self.parser.innerHTML + + def processCharacters(self, token): + if token["data"] == "\u0000": + return + self.tree.insertText(token["data"]) + + def startTagOption(self, token): + # We need to imply </option> if <option> is the current node. + if self.tree.openElements[-1].name == "option": + self.tree.openElements.pop() + self.tree.insertElement(token) + + def startTagOptgroup(self, token): + if self.tree.openElements[-1].name == "option": + self.tree.openElements.pop() + if self.tree.openElements[-1].name == "optgroup": + self.tree.openElements.pop() + self.tree.insertElement(token) + + def startTagSelect(self, token): + self.parser.parseError("unexpected-select-in-select") + self.endTagSelect(impliedTagToken("select")) + + def startTagInput(self, token): + self.parser.parseError("unexpected-input-in-select") + if self.tree.elementInScope("select", variant="select"): + self.endTagSelect(impliedTagToken("select")) + return token + else: + assert self.parser.innerHTML + + def startTagScript(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagOther(self, token): + self.parser.parseError("unexpected-start-tag-in-select", + {"name": token["name"]}) + + def endTagOption(self, token): + if self.tree.openElements[-1].name == "option": + self.tree.openElements.pop() + else: + self.parser.parseError("unexpected-end-tag-in-select", + {"name": "option"}) + + def endTagOptgroup(self, token): + # </optgroup> implicitly closes <option> + if (self.tree.openElements[-1].name == "option" and + self.tree.openElements[-2].name == "optgroup"): + self.tree.openElements.pop() + # It also closes </optgroup> + if self.tree.openElements[-1].name == "optgroup": + self.tree.openElements.pop() + # But nothing else + else: + self.parser.parseError("unexpected-end-tag-in-select", + {"name": "optgroup"}) + + def endTagSelect(self, token): + if self.tree.elementInScope("select", variant="select"): + node = self.tree.openElements.pop() + while node.name != "select": + node = self.tree.openElements.pop() + self.parser.resetInsertionMode() + else: + # innerHTML case + assert self.parser.innerHTML + self.parser.parseError() + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag-in-select", + {"name": token["name"]}) + + class InSelectInTablePhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), + self.startTagTable) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), + self.endTagTable) + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.parser.phases["inSelect"].processEOF() + + def processCharacters(self, token): + return self.parser.phases["inSelect"].processCharacters(token) + + def startTagTable(self, token): + self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) + self.endTagOther(impliedTagToken("select")) + return token + + def startTagOther(self, token): + return self.parser.phases["inSelect"].processStartTag(token) + + def endTagTable(self, token): + self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) + if self.tree.elementInScope(token["name"], variant="table"): + self.endTagOther(impliedTagToken("select")) + return token + + def endTagOther(self, token): + return self.parser.phases["inSelect"].processEndTag(token) + + class InForeignContentPhase(Phase): + breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", + "center", "code", "dd", "div", "dl", "dt", + "em", "embed", "h1", "h2", "h3", + "h4", "h5", "h6", "head", "hr", "i", "img", + "li", "listing", "menu", "meta", "nobr", + "ol", "p", "pre", "ruby", "s", "small", + "span", "strong", "strike", "sub", "sup", + "table", "tt", "u", "ul", "var"]) + + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + def adjustSVGTagNames(self, token): + replacements = {"altglyph": "altGlyph", + "altglyphdef": "altGlyphDef", + "altglyphitem": "altGlyphItem", + "animatecolor": "animateColor", + "animatemotion": "animateMotion", + "animatetransform": "animateTransform", + "clippath": "clipPath", + "feblend": "feBlend", + "fecolormatrix": "feColorMatrix", + "fecomponenttransfer": "feComponentTransfer", + "fecomposite": "feComposite", + "feconvolvematrix": "feConvolveMatrix", + "fediffuselighting": "feDiffuseLighting", + "fedisplacementmap": "feDisplacementMap", + "fedistantlight": "feDistantLight", + "feflood": "feFlood", + "fefunca": "feFuncA", + "fefuncb": "feFuncB", + "fefuncg": "feFuncG", + "fefuncr": "feFuncR", + "fegaussianblur": "feGaussianBlur", + "feimage": "feImage", + "femerge": "feMerge", + "femergenode": "feMergeNode", + "femorphology": "feMorphology", + "feoffset": "feOffset", + "fepointlight": "fePointLight", + "fespecularlighting": "feSpecularLighting", + "fespotlight": "feSpotLight", + "fetile": "feTile", + "feturbulence": "feTurbulence", + "foreignobject": "foreignObject", + "glyphref": "glyphRef", + "lineargradient": "linearGradient", + "radialgradient": "radialGradient", + "textpath": "textPath"} + + if token["name"] in replacements: + token["name"] = replacements[token["name"]] + + def processCharacters(self, token): + if token["data"] == "\u0000": + token["data"] = "\uFFFD" + elif (self.parser.framesetOK and + any(char not in spaceCharacters for char in token["data"])): + self.parser.framesetOK = False + Phase.processCharacters(self, token) + + def processStartTag(self, token): + currentNode = self.tree.openElements[-1] + if (token["name"] in self.breakoutElements or + (token["name"] == "font" and + set(token["data"].keys()) & set(["color", "face", "size"]))): + self.parser.parseError("unexpected-html-element-in-foreign-content", + {"name": token["name"]}) + while (self.tree.openElements[-1].namespace != + self.tree.defaultNamespace and + not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and + not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): + self.tree.openElements.pop() + return token + + else: + if currentNode.namespace == namespaces["mathml"]: + self.parser.adjustMathMLAttributes(token) + elif currentNode.namespace == namespaces["svg"]: + self.adjustSVGTagNames(token) + self.parser.adjustSVGAttributes(token) + self.parser.adjustForeignAttributes(token) + token["namespace"] = currentNode.namespace + self.tree.insertElement(token) + if token["selfClosing"]: + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def processEndTag(self, token): + nodeIndex = len(self.tree.openElements) - 1 + node = self.tree.openElements[-1] + if node.name.translate(asciiUpper2Lower) != token["name"]: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + while True: + if node.name.translate(asciiUpper2Lower) == token["name"]: + # XXX this isn't in the spec but it seems necessary + if self.parser.phase == self.parser.phases["inTableText"]: + self.parser.phase.flushCharacters() + self.parser.phase = self.parser.phase.originalPhase + while self.tree.openElements.pop() != node: + assert self.tree.openElements + new_token = None + break + nodeIndex -= 1 + + node = self.tree.openElements[nodeIndex] + if node.namespace != self.tree.defaultNamespace: + continue + else: + new_token = self.parser.phase.processEndTag(token) + break + return new_token + + class AfterBodyPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + # Stop parsing + pass + + def processComment(self, token): + # This is needed because data is to be appended to the <html> element + # here and not to whatever is currently open. + self.tree.insertComment(token, self.tree.openElements[0]) + + def processCharacters(self, token): + self.parser.parseError("unexpected-char-after-body") + self.parser.phase = self.parser.phases["inBody"] + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagOther(self, token): + self.parser.parseError("unexpected-start-tag-after-body", + {"name": token["name"]}) + self.parser.phase = self.parser.phases["inBody"] + return token + + def endTagHtml(self, name): + if self.parser.innerHTML: + self.parser.parseError("unexpected-end-tag-after-body-innerhtml") + else: + self.parser.phase = self.parser.phases["afterAfterBody"] + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag-after-body", + {"name": token["name"]}) + self.parser.phase = self.parser.phases["inBody"] + return token + + class InFramesetPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("frameset", self.startTagFrameset), + ("frame", self.startTagFrame), + ("noframes", self.startTagNoframes) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("frameset", self.endTagFrameset) + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + if self.tree.openElements[-1].name != "html": + self.parser.parseError("eof-in-frameset") + else: + assert self.parser.innerHTML + + def processCharacters(self, token): + self.parser.parseError("unexpected-char-in-frameset") + + def startTagFrameset(self, token): + self.tree.insertElement(token) + + def startTagFrame(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + + def startTagNoframes(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagOther(self, token): + self.parser.parseError("unexpected-start-tag-in-frameset", + {"name": token["name"]}) + + def endTagFrameset(self, token): + if self.tree.openElements[-1].name == "html": + # innerHTML case + self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") + else: + self.tree.openElements.pop() + if (not self.parser.innerHTML and + self.tree.openElements[-1].name != "frameset"): + # If we're not in innerHTML mode and the current node is not a + # "frameset" element (anymore) then switch. + self.parser.phase = self.parser.phases["afterFrameset"] + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag-in-frameset", + {"name": token["name"]}) + + class AfterFramesetPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#after3 + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("noframes", self.startTagNoframes) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = _utils.MethodDispatcher([ + ("html", self.endTagHtml) + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + # Stop parsing + pass + + def processCharacters(self, token): + self.parser.parseError("unexpected-char-after-frameset") + + def startTagNoframes(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagOther(self, token): + self.parser.parseError("unexpected-start-tag-after-frameset", + {"name": token["name"]}) + + def endTagHtml(self, token): + self.parser.phase = self.parser.phases["afterAfterFrameset"] + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag-after-frameset", + {"name": token["name"]}) + + class AfterAfterBodyPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml) + ]) + self.startTagHandler.default = self.startTagOther + + def processEOF(self): + pass + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processSpaceCharacters(self, token): + return self.parser.phases["inBody"].processSpaceCharacters(token) + + def processCharacters(self, token): + self.parser.parseError("expected-eof-but-got-char") + self.parser.phase = self.parser.phases["inBody"] + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagOther(self, token): + self.parser.parseError("expected-eof-but-got-start-tag", + {"name": token["name"]}) + self.parser.phase = self.parser.phases["inBody"] + return token + + def processEndTag(self, token): + self.parser.parseError("expected-eof-but-got-end-tag", + {"name": token["name"]}) + self.parser.phase = self.parser.phases["inBody"] + return token + + class AfterAfterFramesetPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = _utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("noframes", self.startTagNoFrames) + ]) + self.startTagHandler.default = self.startTagOther + + def processEOF(self): + pass + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processSpaceCharacters(self, token): + return self.parser.phases["inBody"].processSpaceCharacters(token) + + def processCharacters(self, token): + self.parser.parseError("expected-eof-but-got-char") + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagNoFrames(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagOther(self, token): + self.parser.parseError("expected-eof-but-got-start-tag", + {"name": token["name"]}) + + def processEndTag(self, token): + self.parser.parseError("expected-eof-but-got-end-tag", + {"name": token["name"]}) + # pylint:enable=unused-argument + + return { + "initial": InitialPhase, + "beforeHtml": BeforeHtmlPhase, + "beforeHead": BeforeHeadPhase, + "inHead": InHeadPhase, + "inHeadNoscript": InHeadNoscriptPhase, + "afterHead": AfterHeadPhase, + "inBody": InBodyPhase, + "text": TextPhase, + "inTable": InTablePhase, + "inTableText": InTableTextPhase, + "inCaption": InCaptionPhase, + "inColumnGroup": InColumnGroupPhase, + "inTableBody": InTableBodyPhase, + "inRow": InRowPhase, + "inCell": InCellPhase, + "inSelect": InSelectPhase, + "inSelectInTable": InSelectInTablePhase, + "inForeignContent": InForeignContentPhase, + "afterBody": AfterBodyPhase, + "inFrameset": InFramesetPhase, + "afterFrameset": AfterFramesetPhase, + "afterAfterBody": AfterAfterBodyPhase, + "afterAfterFrameset": AfterAfterFramesetPhase, + # XXX after after frameset + } + + +def adjust_attributes(token, replacements): + needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) + if needs_adjustment: + token['data'] = OrderedDict((replacements.get(k, k), v) + for k, v in token['data'].items()) + + +def impliedTagToken(name, type="EndTag", attributes=None, + selfClosing=False): + if attributes is None: + attributes = {} + return {"type": tokenTypes[type], "name": name, "data": attributes, + "selfClosing": selfClosing} + + +class ParseError(Exception): + """Error in parsed document""" + pass diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/serializer.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/serializer.py new file mode 100644 index 00000000..53f4d44c --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/serializer.py @@ -0,0 +1,409 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import text_type + +import re + +from codecs import register_error, xmlcharrefreplace_errors + +from .constants import voidElements, booleanAttributes, spaceCharacters +from .constants import rcdataElements, entities, xmlEntities +from . import treewalkers, _utils +from xml.sax.saxutils import escape + +_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`" +_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]") +_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars + + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" + "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" + "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" + "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" + "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" + "\u3000]") + + +_encode_entity_map = {} +_is_ucs4 = len("\U0010FFFF") == 1 +for k, v in list(entities.items()): + # skip multi-character entities + if ((_is_ucs4 and len(v) > 1) or + (not _is_ucs4 and len(v) > 2)): + continue + if v != "&": + if len(v) == 2: + v = _utils.surrogatePairToCodepoint(v) + else: + v = ord(v) + if v not in _encode_entity_map or k.islower(): + # prefer < over < and similarly for &, >, etc. + _encode_entity_map[v] = k + + +def htmlentityreplace_errors(exc): + if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): + res = [] + codepoints = [] + skip = False + for i, c in enumerate(exc.object[exc.start:exc.end]): + if skip: + skip = False + continue + index = i + exc.start + if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): + codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2]) + skip = True + else: + codepoint = ord(c) + codepoints.append(codepoint) + for cp in codepoints: + e = _encode_entity_map.get(cp) + if e: + res.append("&") + res.append(e) + if not e.endswith(";"): + res.append(";") + else: + res.append("&#x%s;" % (hex(cp)[2:])) + return ("".join(res), exc.end) + else: + return xmlcharrefreplace_errors(exc) + + +register_error("htmlentityreplace", htmlentityreplace_errors) + + +def serialize(input, tree="etree", encoding=None, **serializer_opts): + """Serializes the input token stream using the specified treewalker + + :arg input: the token stream to serialize + + :arg tree: the treewalker to use + + :arg encoding: the encoding to use + + :arg serializer_opts: any options to pass to the + :py:class:`html5lib.serializer.HTMLSerializer` that gets created + + :returns: the tree serialized as a string + + Example: + + >>> from html5lib.html5parser import parse + >>> from html5lib.serializer import serialize + >>> token_stream = parse('<html><body><p>Hi!</p></body></html>') + >>> serialize(token_stream, omit_optional_tags=False) + '<html><head></head><body><p>Hi!</p></body></html>' + + """ + # XXX: Should we cache this? + walker = treewalkers.getTreeWalker(tree) + s = HTMLSerializer(**serializer_opts) + return s.render(walker(input), encoding) + + +class HTMLSerializer(object): + + # attribute quoting options + quote_attr_values = "legacy" # be secure by default + quote_char = '"' + use_best_quote_char = True + + # tag syntax options + omit_optional_tags = True + minimize_boolean_attributes = True + use_trailing_solidus = False + space_before_trailing_solidus = True + + # escaping options + escape_lt_in_attrs = False + escape_rcdata = False + resolve_entities = True + + # miscellaneous options + alphabetical_attributes = False + inject_meta_charset = True + strip_whitespace = False + sanitize = False + + options = ("quote_attr_values", "quote_char", "use_best_quote_char", + "omit_optional_tags", "minimize_boolean_attributes", + "use_trailing_solidus", "space_before_trailing_solidus", + "escape_lt_in_attrs", "escape_rcdata", "resolve_entities", + "alphabetical_attributes", "inject_meta_charset", + "strip_whitespace", "sanitize") + + def __init__(self, **kwargs): + """Initialize HTMLSerializer + + :arg inject_meta_charset: Whether or not to inject the meta charset. + + Defaults to ``True``. + + :arg quote_attr_values: Whether to quote attribute values that don't + require quoting per legacy browser behavior (``"legacy"``), when + required by the standard (``"spec"``), or always (``"always"``). + + Defaults to ``"legacy"``. + + :arg quote_char: Use given quote character for attribute quoting. + + Defaults to ``"`` which will use double quotes unless attribute + value contains a double quote, in which case single quotes are + used. + + :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute + values. + + Defaults to ``False``. + + :arg escape_rcdata: Whether to escape characters that need to be + escaped within normal elements within rcdata elements such as + style. + + Defaults to ``False``. + + :arg resolve_entities: Whether to resolve named character entities that + appear in the source tree. The XML predefined entities < > + & " ' are unaffected by this setting. + + Defaults to ``True``. + + :arg strip_whitespace: Whether to remove semantically meaningless + whitespace. (This compresses all whitespace to a single space + except within ``pre``.) + + Defaults to ``False``. + + :arg minimize_boolean_attributes: Shortens boolean attributes to give + just the attribute value, for example:: + + <input disabled="disabled"> + + becomes:: + + <input disabled> + + Defaults to ``True``. + + :arg use_trailing_solidus: Includes a close-tag slash at the end of the + start tag of void elements (empty elements whose end tag is + forbidden). E.g. ``<hr/>``. + + Defaults to ``False``. + + :arg space_before_trailing_solidus: Places a space immediately before + the closing slash in a tag using a trailing solidus. E.g. + ``<hr />``. Requires ``use_trailing_solidus=True``. + + Defaults to ``True``. + + :arg sanitize: Strip all unsafe or unknown constructs from output. + See :py:class:`html5lib.filters.sanitizer.Filter`. + + Defaults to ``False``. + + :arg omit_optional_tags: Omit start/end tags that are optional. + + Defaults to ``True``. + + :arg alphabetical_attributes: Reorder attributes to be in alphabetical order. + + Defaults to ``False``. + + """ + unexpected_args = frozenset(kwargs) - frozenset(self.options) + if len(unexpected_args) > 0: + raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args))) + if 'quote_char' in kwargs: + self.use_best_quote_char = False + for attr in self.options: + setattr(self, attr, kwargs.get(attr, getattr(self, attr))) + self.errors = [] + self.strict = False + + def encode(self, string): + assert(isinstance(string, text_type)) + if self.encoding: + return string.encode(self.encoding, "htmlentityreplace") + else: + return string + + def encodeStrict(self, string): + assert(isinstance(string, text_type)) + if self.encoding: + return string.encode(self.encoding, "strict") + else: + return string + + def serialize(self, treewalker, encoding=None): + # pylint:disable=too-many-nested-blocks + self.encoding = encoding + in_cdata = False + self.errors = [] + + if encoding and self.inject_meta_charset: + from .filters.inject_meta_charset import Filter + treewalker = Filter(treewalker, encoding) + # Alphabetical attributes is here under the assumption that none of + # the later filters add or change order of attributes; it needs to be + # before the sanitizer so escaped elements come out correctly + if self.alphabetical_attributes: + from .filters.alphabeticalattributes import Filter + treewalker = Filter(treewalker) + # WhitespaceFilter should be used before OptionalTagFilter + # for maximum efficiently of this latter filter + if self.strip_whitespace: + from .filters.whitespace import Filter + treewalker = Filter(treewalker) + if self.sanitize: + from .filters.sanitizer import Filter + treewalker = Filter(treewalker) + if self.omit_optional_tags: + from .filters.optionaltags import Filter + treewalker = Filter(treewalker) + + for token in treewalker: + type = token["type"] + if type == "Doctype": + doctype = "<!DOCTYPE %s" % token["name"] + + if token["publicId"]: + doctype += ' PUBLIC "%s"' % token["publicId"] + elif token["systemId"]: + doctype += " SYSTEM" + if token["systemId"]: + if token["systemId"].find('"') >= 0: + if token["systemId"].find("'") >= 0: + self.serializeError("System identifer contains both single and double quote characters") + quote_char = "'" + else: + quote_char = '"' + doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) + + doctype += ">" + yield self.encodeStrict(doctype) + + elif type in ("Characters", "SpaceCharacters"): + if type == "SpaceCharacters" or in_cdata: + if in_cdata and token["data"].find("</") >= 0: + self.serializeError("Unexpected </ in CDATA") + yield self.encode(token["data"]) + else: + yield self.encode(escape(token["data"])) + + elif type in ("StartTag", "EmptyTag"): + name = token["name"] + yield self.encodeStrict("<%s" % name) + if name in rcdataElements and not self.escape_rcdata: + in_cdata = True + elif in_cdata: + self.serializeError("Unexpected child element of a CDATA element") + for (_, attr_name), attr_value in token["data"].items(): + # TODO: Add namespace support here + k = attr_name + v = attr_value + yield self.encodeStrict(' ') + + yield self.encodeStrict(k) + if not self.minimize_boolean_attributes or \ + (k not in booleanAttributes.get(name, tuple()) and + k not in booleanAttributes.get("", tuple())): + yield self.encodeStrict("=") + if self.quote_attr_values == "always" or len(v) == 0: + quote_attr = True + elif self.quote_attr_values == "spec": + quote_attr = _quoteAttributeSpec.search(v) is not None + elif self.quote_attr_values == "legacy": + quote_attr = _quoteAttributeLegacy.search(v) is not None + else: + raise ValueError("quote_attr_values must be one of: " + "'always', 'spec', or 'legacy'") + v = v.replace("&", "&") + if self.escape_lt_in_attrs: + v = v.replace("<", "<") + if quote_attr: + quote_char = self.quote_char + if self.use_best_quote_char: + if "'" in v and '"' not in v: + quote_char = '"' + elif '"' in v and "'" not in v: + quote_char = "'" + if quote_char == "'": + v = v.replace("'", "'") + else: + v = v.replace('"', """) + yield self.encodeStrict(quote_char) + yield self.encode(v) + yield self.encodeStrict(quote_char) + else: + yield self.encode(v) + if name in voidElements and self.use_trailing_solidus: + if self.space_before_trailing_solidus: + yield self.encodeStrict(" /") + else: + yield self.encodeStrict("/") + yield self.encode(">") + + elif type == "EndTag": + name = token["name"] + if name in rcdataElements: + in_cdata = False + elif in_cdata: + self.serializeError("Unexpected child element of a CDATA element") + yield self.encodeStrict("</%s>" % name) + + elif type == "Comment": + data = token["data"] + if data.find("--") >= 0: + self.serializeError("Comment contains --") + yield self.encodeStrict("<!--%s-->" % token["data"]) + + elif type == "Entity": + name = token["name"] + key = name + ";" + if key not in entities: + self.serializeError("Entity %s not recognized" % name) + if self.resolve_entities and key not in xmlEntities: + data = entities[key] + else: + data = "&%s;" % name + yield self.encodeStrict(data) + + else: + self.serializeError(token["data"]) + + def render(self, treewalker, encoding=None): + """Serializes the stream from the treewalker into a string + + :arg treewalker: the treewalker to serialize + + :arg encoding: the string encoding to use + + :returns: the serialized tree + + Example: + + >>> from html5lib import parse, getTreeWalker + >>> from html5lib.serializer import HTMLSerializer + >>> token_stream = parse('<html><body>Hi!</body></html>') + >>> walker = getTreeWalker('etree') + >>> serializer = HTMLSerializer(omit_optional_tags=False) + >>> serializer.render(walker(token_stream)) + '<html><head></head><body>Hi!</body></html>' + + """ + if encoding: + return b"".join(list(self.serialize(treewalker, encoding))) + else: + return "".join(list(self.serialize(treewalker))) + + def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): + # XXX The idea is to make data mandatory. + self.errors.append(data) + if self.strict: + raise SerializeError + + +class SerializeError(Exception): + """Error in serialized tree""" + pass diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__init__.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__init__.py new file mode 100644 index 00000000..7ef59590 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__init__.py @@ -0,0 +1,30 @@ +"""Tree adapters let you convert from one tree structure to another + +Example: + +.. code-block:: python + + from pip._vendor import html5lib + from pip._vendor.html5lib.treeadapters import genshi + + doc = '<html><body>Hi!</body></html>' + treebuilder = html5lib.getTreeBuilder('etree') + parser = html5lib.HTMLParser(tree=treebuilder) + tree = parser.parse(doc) + TreeWalker = html5lib.getTreeWalker('etree') + + genshi_tree = genshi.to_genshi(TreeWalker(tree)) + +""" +from __future__ import absolute_import, division, unicode_literals + +from . import sax + +__all__ = ["sax"] + +try: + from . import genshi # noqa +except ImportError: + pass +else: + __all__.append("genshi") diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..6d4ae732 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/__init__.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..30c93203 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/genshi.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..c3567384 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/__pycache__/sax.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/genshi.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/genshi.py new file mode 100644 index 00000000..61d5fb6a --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/genshi.py @@ -0,0 +1,54 @@ +from __future__ import absolute_import, division, unicode_literals + +from genshi.core import QName, Attrs +from genshi.core import START, END, TEXT, COMMENT, DOCTYPE + + +def to_genshi(walker): + """Convert a tree to a genshi tree + + :arg walker: the treewalker to use to walk the tree to convert it + + :returns: generator of genshi nodes + + """ + text = [] + for token in walker: + type = token["type"] + if type in ("Characters", "SpaceCharacters"): + text.append(token["data"]) + elif text: + yield TEXT, "".join(text), (None, -1, -1) + text = [] + + if type in ("StartTag", "EmptyTag"): + if token["namespace"]: + name = "{%s}%s" % (token["namespace"], token["name"]) + else: + name = token["name"] + attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) + for attr, value in token["data"].items()]) + yield (START, (QName(name), attrs), (None, -1, -1)) + if type == "EmptyTag": + type = "EndTag" + + if type == "EndTag": + if token["namespace"]: + name = "{%s}%s" % (token["namespace"], token["name"]) + else: + name = token["name"] + + yield END, QName(name), (None, -1, -1) + + elif type == "Comment": + yield COMMENT, token["data"], (None, -1, -1) + + elif type == "Doctype": + yield DOCTYPE, (token["name"], token["publicId"], + token["systemId"]), (None, -1, -1) + + else: + pass # FIXME: What to do? + + if text: + yield TEXT, "".join(text), (None, -1, -1) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/sax.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/sax.py new file mode 100644 index 00000000..f4ccea5a --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treeadapters/sax.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import, division, unicode_literals + +from xml.sax.xmlreader import AttributesNSImpl + +from ..constants import adjustForeignAttributes, unadjustForeignAttributes + +prefix_mapping = {} +for prefix, localName, namespace in adjustForeignAttributes.values(): + if prefix is not None: + prefix_mapping[prefix] = namespace + + +def to_sax(walker, handler): + """Call SAX-like content handler based on treewalker walker + + :arg walker: the treewalker to use to walk the tree to convert it + + :arg handler: SAX handler to use + + """ + handler.startDocument() + for prefix, namespace in prefix_mapping.items(): + handler.startPrefixMapping(prefix, namespace) + + for token in walker: + type = token["type"] + if type == "Doctype": + continue + elif type in ("StartTag", "EmptyTag"): + attrs = AttributesNSImpl(token["data"], + unadjustForeignAttributes) + handler.startElementNS((token["namespace"], token["name"]), + token["name"], + attrs) + if type == "EmptyTag": + handler.endElementNS((token["namespace"], token["name"]), + token["name"]) + elif type == "EndTag": + handler.endElementNS((token["namespace"], token["name"]), + token["name"]) + elif type in ("Characters", "SpaceCharacters"): + handler.characters(token["data"]) + elif type == "Comment": + pass + else: + assert False, "Unknown token type" + + for prefix, namespace in prefix_mapping.items(): + handler.endPrefixMapping(prefix) + handler.endDocument() diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py new file mode 100644 index 00000000..d44447ea --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__init__.py @@ -0,0 +1,88 @@ +"""A collection of modules for building different kinds of trees from HTML +documents. + +To create a treebuilder for a new type of tree, you need to do +implement several things: + +1. A set of classes for various types of elements: Document, Doctype, Comment, + Element. These must implement the interface of ``base.treebuilders.Node`` + (although comment nodes have a different signature for their constructor, + see ``treebuilders.etree.Comment``) Textual content may also be implemented + as another node type, or not, as your tree implementation requires. + +2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits + from ``treebuilders.base.TreeBuilder``. This has 4 required attributes: + + * ``documentClass`` - the class to use for the bottommost node of a document + * ``elementClass`` - the class to use for HTML Elements + * ``commentClass`` - the class to use for comments + * ``doctypeClass`` - the class to use for doctypes + + It also has one required method: + + * ``getDocument`` - Returns the root node of the complete document tree + +3. If you wish to run the unit tests, you must also create a ``testSerializer`` + method on your treebuilder which accepts a node and returns a string + containing Node and its children serialized according to the format used in + the unittests + +""" + +from __future__ import absolute_import, division, unicode_literals + +from .._utils import default_etree + +treeBuilderCache = {} + + +def getTreeBuilder(treeType, implementation=None, **kwargs): + """Get a TreeBuilder class for various types of trees with built-in support + + :arg treeType: the name of the tree type required (case-insensitive). Supported + values are: + + * "dom" - A generic builder for DOM implementations, defaulting to a + xml.dom.minidom based implementation. + * "etree" - A generic builder for tree implementations exposing an + ElementTree-like interface, defaulting to xml.etree.cElementTree if + available and xml.etree.ElementTree if not. + * "lxml" - A etree-based builder for lxml.etree, handling limitations + of lxml's implementation. + + :arg implementation: (Currently applies to the "etree" and "dom" tree + types). A module implementing the tree type e.g. xml.etree.ElementTree + or xml.etree.cElementTree. + + :arg kwargs: Any additional options to pass to the TreeBuilder when + creating it. + + Example: + + >>> from html5lib.treebuilders import getTreeBuilder + >>> builder = getTreeBuilder('etree') + + """ + + treeType = treeType.lower() + if treeType not in treeBuilderCache: + if treeType == "dom": + from . import dom + # Come up with a sane default (pref. from the stdlib) + if implementation is None: + from xml.dom import minidom + implementation = minidom + # NEVER cache here, caching is done in the dom submodule + return dom.getDomModule(implementation, **kwargs).TreeBuilder + elif treeType == "lxml": + from . import etree_lxml + treeBuilderCache[treeType] = etree_lxml.TreeBuilder + elif treeType == "etree": + from . import etree + if implementation is None: + implementation = default_etree + # NEVER cache here, caching is done in the etree submodule + return etree.getETreeModule(implementation, **kwargs).TreeBuilder + else: + raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) + return treeBuilderCache.get(treeType) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..4160076a --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/__init__.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/base.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/base.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..f3290210 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/base.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..9e461bea --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/dom.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..bd7bceb3 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/etree.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..8aa7773e --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/__pycache__/etree_lxml.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/base.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/base.py new file mode 100644 index 00000000..73973db5 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/base.py @@ -0,0 +1,417 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import text_type + +from ..constants import scopingElements, tableInsertModeElements, namespaces + +# The scope markers are inserted when entering object elements, +# marquees, table cells, and table captions, and are used to prevent formatting +# from "leaking" into tables, object elements, and marquees. +Marker = None + +listElementsMap = { + None: (frozenset(scopingElements), False), + "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), + "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), + (namespaces["html"], "ul")])), False), + "table": (frozenset([(namespaces["html"], "html"), + (namespaces["html"], "table")]), False), + "select": (frozenset([(namespaces["html"], "optgroup"), + (namespaces["html"], "option")]), True) +} + + +class Node(object): + """Represents an item in the tree""" + def __init__(self, name): + """Creates a Node + + :arg name: The tag name associated with the node + + """ + # The tag name assocaited with the node + self.name = name + # The parent of the current node (or None for the document node) + self.parent = None + # The value of the current node (applies to text nodes and comments) + self.value = None + # A dict holding name -> value pairs for attributes of the node + self.attributes = {} + # A list of child nodes of the current node. This must include all + # elements but not necessarily other node types. + self.childNodes = [] + # A list of miscellaneous flags that can be set on the node. + self._flags = [] + + def __str__(self): + attributesStr = " ".join(["%s=\"%s\"" % (name, value) + for name, value in + self.attributes.items()]) + if attributesStr: + return "<%s %s>" % (self.name, attributesStr) + else: + return "<%s>" % (self.name) + + def __repr__(self): + return "<%s>" % (self.name) + + def appendChild(self, node): + """Insert node as a child of the current node + + :arg node: the node to insert + + """ + raise NotImplementedError + + def insertText(self, data, insertBefore=None): + """Insert data as text in the current node, positioned before the + start of node insertBefore or to the end of the node's text. + + :arg data: the data to insert + + :arg insertBefore: True if you want to insert the text before the node + and False if you want to insert it after the node + + """ + raise NotImplementedError + + def insertBefore(self, node, refNode): + """Insert node as a child of the current node, before refNode in the + list of child nodes. Raises ValueError if refNode is not a child of + the current node + + :arg node: the node to insert + + :arg refNode: the child node to insert the node before + + """ + raise NotImplementedError + + def removeChild(self, node): + """Remove node from the children of the current node + + :arg node: the child node to remove + + """ + raise NotImplementedError + + def reparentChildren(self, newParent): + """Move all the children of the current node to newParent. + This is needed so that trees that don't store text as nodes move the + text in the correct way + + :arg newParent: the node to move all this node's children to + + """ + # XXX - should this method be made more general? + for child in self.childNodes: + newParent.appendChild(child) + self.childNodes = [] + + def cloneNode(self): + """Return a shallow copy of the current node i.e. a node with the same + name and attributes but with no parent or child nodes + """ + raise NotImplementedError + + def hasContent(self): + """Return true if the node has children or text, false otherwise + """ + raise NotImplementedError + + +class ActiveFormattingElements(list): + def append(self, node): + equalCount = 0 + if node != Marker: + for element in self[::-1]: + if element == Marker: + break + if self.nodesEqual(element, node): + equalCount += 1 + if equalCount == 3: + self.remove(element) + break + list.append(self, node) + + def nodesEqual(self, node1, node2): + if not node1.nameTuple == node2.nameTuple: + return False + + if not node1.attributes == node2.attributes: + return False + + return True + + +class TreeBuilder(object): + """Base treebuilder implementation + + * documentClass - the class to use for the bottommost node of a document + * elementClass - the class to use for HTML Elements + * commentClass - the class to use for comments + * doctypeClass - the class to use for doctypes + + """ + # pylint:disable=not-callable + + # Document class + documentClass = None + + # The class to use for creating a node + elementClass = None + + # The class to use for creating comments + commentClass = None + + # The class to use for creating doctypes + doctypeClass = None + + # Fragment class + fragmentClass = None + + def __init__(self, namespaceHTMLElements): + """Create a TreeBuilder + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + """ + if namespaceHTMLElements: + self.defaultNamespace = "http://www.w3.org/1999/xhtml" + else: + self.defaultNamespace = None + self.reset() + + def reset(self): + self.openElements = [] + self.activeFormattingElements = ActiveFormattingElements() + + # XXX - rename these to headElement, formElement + self.headPointer = None + self.formPointer = None + + self.insertFromTable = False + + self.document = self.documentClass() + + def elementInScope(self, target, variant=None): + + # If we pass a node in we match that. if we pass a string + # match any node with that name + exactNode = hasattr(target, "nameTuple") + if not exactNode: + if isinstance(target, text_type): + target = (namespaces["html"], target) + assert isinstance(target, tuple) + + listElements, invert = listElementsMap[variant] + + for node in reversed(self.openElements): + if exactNode and node == target: + return True + elif not exactNode and node.nameTuple == target: + return True + elif (invert ^ (node.nameTuple in listElements)): + return False + + assert False # We should never reach this point + + def reconstructActiveFormattingElements(self): + # Within this algorithm the order of steps described in the + # specification is not quite the same as the order of steps in the + # code. It should still do the same though. + + # Step 1: stop the algorithm when there's nothing to do. + if not self.activeFormattingElements: + return + + # Step 2 and step 3: we start with the last element. So i is -1. + i = len(self.activeFormattingElements) - 1 + entry = self.activeFormattingElements[i] + if entry == Marker or entry in self.openElements: + return + + # Step 6 + while entry != Marker and entry not in self.openElements: + if i == 0: + # This will be reset to 0 below + i = -1 + break + i -= 1 + # Step 5: let entry be one earlier in the list. + entry = self.activeFormattingElements[i] + + while True: + # Step 7 + i += 1 + + # Step 8 + entry = self.activeFormattingElements[i] + clone = entry.cloneNode() # Mainly to get a new copy of the attributes + + # Step 9 + element = self.insertElement({"type": "StartTag", + "name": clone.name, + "namespace": clone.namespace, + "data": clone.attributes}) + + # Step 10 + self.activeFormattingElements[i] = element + + # Step 11 + if element == self.activeFormattingElements[-1]: + break + + def clearActiveFormattingElements(self): + entry = self.activeFormattingElements.pop() + while self.activeFormattingElements and entry != Marker: + entry = self.activeFormattingElements.pop() + + def elementInActiveFormattingElements(self, name): + """Check if an element exists between the end of the active + formatting elements and the last marker. If it does, return it, else + return false""" + + for item in self.activeFormattingElements[::-1]: + # Check for Marker first because if it's a Marker it doesn't have a + # name attribute. + if item == Marker: + break + elif item.name == name: + return item + return False + + def insertRoot(self, token): + element = self.createElement(token) + self.openElements.append(element) + self.document.appendChild(element) + + def insertDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + + doctype = self.doctypeClass(name, publicId, systemId) + self.document.appendChild(doctype) + + def insertComment(self, token, parent=None): + if parent is None: + parent = self.openElements[-1] + parent.appendChild(self.commentClass(token["data"])) + + def createElement(self, token): + """Create an element but don't insert it anywhere""" + name = token["name"] + namespace = token.get("namespace", self.defaultNamespace) + element = self.elementClass(name, namespace) + element.attributes = token["data"] + return element + + def _getInsertFromTable(self): + return self._insertFromTable + + def _setInsertFromTable(self, value): + """Switch the function used to insert an element from the + normal one to the misnested table one and back again""" + self._insertFromTable = value + if value: + self.insertElement = self.insertElementTable + else: + self.insertElement = self.insertElementNormal + + insertFromTable = property(_getInsertFromTable, _setInsertFromTable) + + def insertElementNormal(self, token): + name = token["name"] + assert isinstance(name, text_type), "Element %s not unicode" % name + namespace = token.get("namespace", self.defaultNamespace) + element = self.elementClass(name, namespace) + element.attributes = token["data"] + self.openElements[-1].appendChild(element) + self.openElements.append(element) + return element + + def insertElementTable(self, token): + """Create an element and insert it into the tree""" + element = self.createElement(token) + if self.openElements[-1].name not in tableInsertModeElements: + return self.insertElementNormal(token) + else: + # We should be in the InTable mode. This means we want to do + # special magic element rearranging + parent, insertBefore = self.getTableMisnestedNodePosition() + if insertBefore is None: + parent.appendChild(element) + else: + parent.insertBefore(element, insertBefore) + self.openElements.append(element) + return element + + def insertText(self, data, parent=None): + """Insert text data.""" + if parent is None: + parent = self.openElements[-1] + + if (not self.insertFromTable or (self.insertFromTable and + self.openElements[-1].name + not in tableInsertModeElements)): + parent.insertText(data) + else: + # We should be in the InTable mode. This means we want to do + # special magic element rearranging + parent, insertBefore = self.getTableMisnestedNodePosition() + parent.insertText(data, insertBefore) + + def getTableMisnestedNodePosition(self): + """Get the foster parent element, and sibling to insert before + (or None) when inserting a misnested table node""" + # The foster parent element is the one which comes before the most + # recently opened table element + # XXX - this is really inelegant + lastTable = None + fosterParent = None + insertBefore = None + for elm in self.openElements[::-1]: + if elm.name == "table": + lastTable = elm + break + if lastTable: + # XXX - we should really check that this parent is actually a + # node here + if lastTable.parent: + fosterParent = lastTable.parent + insertBefore = lastTable + else: + fosterParent = self.openElements[ + self.openElements.index(lastTable) - 1] + else: + fosterParent = self.openElements[0] + return fosterParent, insertBefore + + def generateImpliedEndTags(self, exclude=None): + name = self.openElements[-1].name + # XXX td, th and tr are not actually needed + if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and + name != exclude): + self.openElements.pop() + # XXX This is not entirely what the specification says. We should + # investigate it more closely. + self.generateImpliedEndTags(exclude) + + def getDocument(self): + """Return the final tree""" + return self.document + + def getFragment(self): + """Return the final fragment""" + # assert self.innerHTML + fragment = self.fragmentClass() + self.openElements[0].reparentChildren(fragment) + return fragment + + def testSerializer(self, node): + """Serialize the subtree of node in the format required by unit tests + + :arg node: the node from which to start serializing + + """ + raise NotImplementedError diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/dom.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/dom.py new file mode 100644 index 00000000..d8b53004 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/dom.py @@ -0,0 +1,239 @@ +from __future__ import absolute_import, division, unicode_literals + + +try: + from collections.abc import MutableMapping +except ImportError: # Python 2.7 + from collections import MutableMapping +from xml.dom import minidom, Node +import weakref + +from . import base +from .. import constants +from ..constants import namespaces +from .._utils import moduleFactoryFactory + + +def getDomBuilder(DomImplementation): + Dom = DomImplementation + + class AttrList(MutableMapping): + def __init__(self, element): + self.element = element + + def __iter__(self): + return iter(self.element.attributes.keys()) + + def __setitem__(self, name, value): + if isinstance(name, tuple): + raise NotImplementedError + else: + attr = self.element.ownerDocument.createAttribute(name) + attr.value = value + self.element.attributes[name] = attr + + def __len__(self): + return len(self.element.attributes) + + def items(self): + return list(self.element.attributes.items()) + + def values(self): + return list(self.element.attributes.values()) + + def __getitem__(self, name): + if isinstance(name, tuple): + raise NotImplementedError + else: + return self.element.attributes[name].value + + def __delitem__(self, name): + if isinstance(name, tuple): + raise NotImplementedError + else: + del self.element.attributes[name] + + class NodeBuilder(base.Node): + def __init__(self, element): + base.Node.__init__(self, element.nodeName) + self.element = element + + namespace = property(lambda self: hasattr(self.element, "namespaceURI") and + self.element.namespaceURI or None) + + def appendChild(self, node): + node.parent = self + self.element.appendChild(node.element) + + def insertText(self, data, insertBefore=None): + text = self.element.ownerDocument.createTextNode(data) + if insertBefore: + self.element.insertBefore(text, insertBefore.element) + else: + self.element.appendChild(text) + + def insertBefore(self, node, refNode): + self.element.insertBefore(node.element, refNode.element) + node.parent = self + + def removeChild(self, node): + if node.element.parentNode == self.element: + self.element.removeChild(node.element) + node.parent = None + + def reparentChildren(self, newParent): + while self.element.hasChildNodes(): + child = self.element.firstChild + self.element.removeChild(child) + newParent.element.appendChild(child) + self.childNodes = [] + + def getAttributes(self): + return AttrList(self.element) + + def setAttributes(self, attributes): + if attributes: + for name, value in list(attributes.items()): + if isinstance(name, tuple): + if name[0] is not None: + qualifiedName = (name[0] + ":" + name[1]) + else: + qualifiedName = name[1] + self.element.setAttributeNS(name[2], qualifiedName, + value) + else: + self.element.setAttribute( + name, value) + attributes = property(getAttributes, setAttributes) + + def cloneNode(self): + return NodeBuilder(self.element.cloneNode(False)) + + def hasContent(self): + return self.element.hasChildNodes() + + def getNameTuple(self): + if self.namespace is None: + return namespaces["html"], self.name + else: + return self.namespace, self.name + + nameTuple = property(getNameTuple) + + class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable + def documentClass(self): + self.dom = Dom.getDOMImplementation().createDocument(None, None, None) + return weakref.proxy(self) + + def insertDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + + domimpl = Dom.getDOMImplementation() + doctype = domimpl.createDocumentType(name, publicId, systemId) + self.document.appendChild(NodeBuilder(doctype)) + if Dom == minidom: + doctype.ownerDocument = self.dom + + def elementClass(self, name, namespace=None): + if namespace is None and self.defaultNamespace is None: + node = self.dom.createElement(name) + else: + node = self.dom.createElementNS(namespace, name) + + return NodeBuilder(node) + + def commentClass(self, data): + return NodeBuilder(self.dom.createComment(data)) + + def fragmentClass(self): + return NodeBuilder(self.dom.createDocumentFragment()) + + def appendChild(self, node): + self.dom.appendChild(node.element) + + def testSerializer(self, element): + return testSerializer(element) + + def getDocument(self): + return self.dom + + def getFragment(self): + return base.TreeBuilder.getFragment(self).element + + def insertText(self, data, parent=None): + data = data + if parent != self: + base.TreeBuilder.insertText(self, data, parent) + else: + # HACK: allow text nodes as children of the document node + if hasattr(self.dom, '_child_node_types'): + # pylint:disable=protected-access + if Node.TEXT_NODE not in self.dom._child_node_types: + self.dom._child_node_types = list(self.dom._child_node_types) + self.dom._child_node_types.append(Node.TEXT_NODE) + self.dom.appendChild(self.dom.createTextNode(data)) + + implementation = DomImplementation + name = None + + def testSerializer(element): + element.normalize() + rv = [] + + def serializeElement(element, indent=0): + if element.nodeType == Node.DOCUMENT_TYPE_NODE: + if element.name: + if element.publicId or element.systemId: + publicId = element.publicId or "" + systemId = element.systemId or "" + rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" % + (' ' * indent, element.name, publicId, systemId)) + else: + rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name)) + else: + rv.append("|%s<!DOCTYPE >" % (' ' * indent,)) + elif element.nodeType == Node.DOCUMENT_NODE: + rv.append("#document") + elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: + rv.append("#document-fragment") + elif element.nodeType == Node.COMMENT_NODE: + rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue)) + elif element.nodeType == Node.TEXT_NODE: + rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) + else: + if (hasattr(element, "namespaceURI") and + element.namespaceURI is not None): + name = "%s %s" % (constants.prefixes[element.namespaceURI], + element.nodeName) + else: + name = element.nodeName + rv.append("|%s<%s>" % (' ' * indent, name)) + if element.hasAttributes(): + attributes = [] + for i in range(len(element.attributes)): + attr = element.attributes.item(i) + name = attr.nodeName + value = attr.value + ns = attr.namespaceURI + if ns: + name = "%s %s" % (constants.prefixes[ns], attr.localName) + else: + name = attr.nodeName + attributes.append((name, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + indent += 2 + for child in element.childNodes: + serializeElement(child, indent) + serializeElement(element, 0) + + return "\n".join(rv) + + return locals() + + +# The actual means to get a module! +getDomModule = moduleFactoryFactory(getDomBuilder) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/etree.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/etree.py new file mode 100644 index 00000000..0dedf441 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/etree.py @@ -0,0 +1,340 @@ +from __future__ import absolute_import, division, unicode_literals +# pylint:disable=protected-access + +from pip._vendor.six import text_type + +import re + +from . import base +from .. import _ihatexml +from .. import constants +from ..constants import namespaces +from .._utils import moduleFactoryFactory + +tag_regexp = re.compile("{([^}]*)}(.*)") + + +def getETreeBuilder(ElementTreeImplementation, fullTree=False): + ElementTree = ElementTreeImplementation + ElementTreeCommentType = ElementTree.Comment("asd").tag + + class Element(base.Node): + def __init__(self, name, namespace=None): + self._name = name + self._namespace = namespace + self._element = ElementTree.Element(self._getETreeTag(name, + namespace)) + if namespace is None: + self.nameTuple = namespaces["html"], self._name + else: + self.nameTuple = self._namespace, self._name + self.parent = None + self._childNodes = [] + self._flags = [] + + def _getETreeTag(self, name, namespace): + if namespace is None: + etree_tag = name + else: + etree_tag = "{%s}%s" % (namespace, name) + return etree_tag + + def _setName(self, name): + self._name = name + self._element.tag = self._getETreeTag(self._name, self._namespace) + + def _getName(self): + return self._name + + name = property(_getName, _setName) + + def _setNamespace(self, namespace): + self._namespace = namespace + self._element.tag = self._getETreeTag(self._name, self._namespace) + + def _getNamespace(self): + return self._namespace + + namespace = property(_getNamespace, _setNamespace) + + def _getAttributes(self): + return self._element.attrib + + def _setAttributes(self, attributes): + # Delete existing attributes first + # XXX - there may be a better way to do this... + for key in list(self._element.attrib.keys()): + del self._element.attrib[key] + for key, value in attributes.items(): + if isinstance(key, tuple): + name = "{%s}%s" % (key[2], key[1]) + else: + name = key + self._element.set(name, value) + + attributes = property(_getAttributes, _setAttributes) + + def _getChildNodes(self): + return self._childNodes + + def _setChildNodes(self, value): + del self._element[:] + self._childNodes = [] + for element in value: + self.insertChild(element) + + childNodes = property(_getChildNodes, _setChildNodes) + + def hasContent(self): + """Return true if the node has children or text""" + return bool(self._element.text or len(self._element)) + + def appendChild(self, node): + self._childNodes.append(node) + self._element.append(node._element) + node.parent = self + + def insertBefore(self, node, refNode): + index = list(self._element).index(refNode._element) + self._element.insert(index, node._element) + node.parent = self + + def removeChild(self, node): + self._childNodes.remove(node) + self._element.remove(node._element) + node.parent = None + + def insertText(self, data, insertBefore=None): + if not(len(self._element)): + if not self._element.text: + self._element.text = "" + self._element.text += data + elif insertBefore is None: + # Insert the text as the tail of the last child element + if not self._element[-1].tail: + self._element[-1].tail = "" + self._element[-1].tail += data + else: + # Insert the text before the specified node + children = list(self._element) + index = children.index(insertBefore._element) + if index > 0: + if not self._element[index - 1].tail: + self._element[index - 1].tail = "" + self._element[index - 1].tail += data + else: + if not self._element.text: + self._element.text = "" + self._element.text += data + + def cloneNode(self): + element = type(self)(self.name, self.namespace) + for name, value in self.attributes.items(): + element.attributes[name] = value + return element + + def reparentChildren(self, newParent): + if newParent.childNodes: + newParent.childNodes[-1]._element.tail += self._element.text + else: + if not newParent._element.text: + newParent._element.text = "" + if self._element.text is not None: + newParent._element.text += self._element.text + self._element.text = "" + base.Node.reparentChildren(self, newParent) + + class Comment(Element): + def __init__(self, data): + # Use the superclass constructor to set all properties on the + # wrapper element + self._element = ElementTree.Comment(data) + self.parent = None + self._childNodes = [] + self._flags = [] + + def _getData(self): + return self._element.text + + def _setData(self, value): + self._element.text = value + + data = property(_getData, _setData) + + class DocumentType(Element): + def __init__(self, name, publicId, systemId): + Element.__init__(self, "<!DOCTYPE>") + self._element.text = name + self.publicId = publicId + self.systemId = systemId + + def _getPublicId(self): + return self._element.get("publicId", "") + + def _setPublicId(self, value): + if value is not None: + self._element.set("publicId", value) + + publicId = property(_getPublicId, _setPublicId) + + def _getSystemId(self): + return self._element.get("systemId", "") + + def _setSystemId(self, value): + if value is not None: + self._element.set("systemId", value) + + systemId = property(_getSystemId, _setSystemId) + + class Document(Element): + def __init__(self): + Element.__init__(self, "DOCUMENT_ROOT") + + class DocumentFragment(Element): + def __init__(self): + Element.__init__(self, "DOCUMENT_FRAGMENT") + + def testSerializer(element): + rv = [] + + def serializeElement(element, indent=0): + if not(hasattr(element, "tag")): + element = element.getroot() + if element.tag == "<!DOCTYPE>": + if element.get("publicId") or element.get("systemId"): + publicId = element.get("publicId") or "" + systemId = element.get("systemId") or "" + rv.append("""<!DOCTYPE %s "%s" "%s">""" % + (element.text, publicId, systemId)) + else: + rv.append("<!DOCTYPE %s>" % (element.text,)) + elif element.tag == "DOCUMENT_ROOT": + rv.append("#document") + if element.text is not None: + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + if element.tail is not None: + raise TypeError("Document node cannot have tail") + if hasattr(element, "attrib") and len(element.attrib): + raise TypeError("Document node cannot have attributes") + elif element.tag == ElementTreeCommentType: + rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) + else: + assert isinstance(element.tag, text_type), \ + "Expected unicode, got %s, %s" % (type(element.tag), element.tag) + nsmatch = tag_regexp.match(element.tag) + + if nsmatch is None: + name = element.tag + else: + ns, name = nsmatch.groups() + prefix = constants.prefixes[ns] + name = "%s %s" % (prefix, name) + rv.append("|%s<%s>" % (' ' * indent, name)) + + if hasattr(element, "attrib"): + attributes = [] + for name, value in element.attrib.items(): + nsmatch = tag_regexp.match(name) + if nsmatch is not None: + ns, name = nsmatch.groups() + prefix = constants.prefixes[ns] + attr_string = "%s %s" % (prefix, name) + else: + attr_string = name + attributes.append((attr_string, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + if element.text: + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + indent += 2 + for child in element: + serializeElement(child, indent) + if element.tail: + rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + serializeElement(element, 0) + + return "\n".join(rv) + + def tostring(element): # pylint:disable=unused-variable + """Serialize an element and its child nodes to a string""" + rv = [] + filter = _ihatexml.InfosetFilter() + + def serializeElement(element): + if isinstance(element, ElementTree.ElementTree): + element = element.getroot() + + if element.tag == "<!DOCTYPE>": + if element.get("publicId") or element.get("systemId"): + publicId = element.get("publicId") or "" + systemId = element.get("systemId") or "" + rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" % + (element.text, publicId, systemId)) + else: + rv.append("<!DOCTYPE %s>" % (element.text,)) + elif element.tag == "DOCUMENT_ROOT": + if element.text is not None: + rv.append(element.text) + if element.tail is not None: + raise TypeError("Document node cannot have tail") + if hasattr(element, "attrib") and len(element.attrib): + raise TypeError("Document node cannot have attributes") + + for child in element: + serializeElement(child) + + elif element.tag == ElementTreeCommentType: + rv.append("<!--%s-->" % (element.text,)) + else: + # This is assumed to be an ordinary element + if not element.attrib: + rv.append("<%s>" % (filter.fromXmlName(element.tag),)) + else: + attr = " ".join(["%s=\"%s\"" % ( + filter.fromXmlName(name), value) + for name, value in element.attrib.items()]) + rv.append("<%s %s>" % (element.tag, attr)) + if element.text: + rv.append(element.text) + + for child in element: + serializeElement(child) + + rv.append("</%s>" % (element.tag,)) + + if element.tail: + rv.append(element.tail) + + serializeElement(element) + + return "".join(rv) + + class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable + documentClass = Document + doctypeClass = DocumentType + elementClass = Element + commentClass = Comment + fragmentClass = DocumentFragment + implementation = ElementTreeImplementation + + def testSerializer(self, element): + return testSerializer(element) + + def getDocument(self): + if fullTree: + return self.document._element + else: + if self.defaultNamespace is not None: + return self.document._element.find( + "{%s}html" % self.defaultNamespace) + else: + return self.document._element.find("html") + + def getFragment(self): + return base.TreeBuilder.getFragment(self)._element + + return locals() + + +getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py new file mode 100644 index 00000000..ca12a99c --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py @@ -0,0 +1,366 @@ +"""Module for supporting the lxml.etree library. The idea here is to use as much +of the native library as possible, without using fragile hacks like custom element +names that break between releases. The downside of this is that we cannot represent +all possible trees; specifically the following are known to cause problems: + +Text or comments as siblings of the root element +Docypes with no name + +When any of these things occur, we emit a DataLossWarning +""" + +from __future__ import absolute_import, division, unicode_literals +# pylint:disable=protected-access + +import warnings +import re +import sys + +from . import base +from ..constants import DataLossWarning +from .. import constants +from . import etree as etree_builders +from .. import _ihatexml + +import lxml.etree as etree + + +fullTree = True +tag_regexp = re.compile("{([^}]*)}(.*)") + +comment_type = etree.Comment("asd").tag + + +class DocumentType(object): + def __init__(self, name, publicId, systemId): + self.name = name + self.publicId = publicId + self.systemId = systemId + + +class Document(object): + def __init__(self): + self._elementTree = None + self._childNodes = [] + + def appendChild(self, element): + self._elementTree.getroot().addnext(element._element) + + def _getChildNodes(self): + return self._childNodes + + childNodes = property(_getChildNodes) + + +def testSerializer(element): + rv = [] + infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) + + def serializeElement(element, indent=0): + if not hasattr(element, "tag"): + if hasattr(element, "getroot"): + # Full tree case + rv.append("#document") + if element.docinfo.internalDTD: + if not (element.docinfo.public_id or + element.docinfo.system_url): + dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name + else: + dtd_str = """<!DOCTYPE %s "%s" "%s">""" % ( + element.docinfo.root_name, + element.docinfo.public_id, + element.docinfo.system_url) + rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) + next_element = element.getroot() + while next_element.getprevious() is not None: + next_element = next_element.getprevious() + while next_element is not None: + serializeElement(next_element, indent + 2) + next_element = next_element.getnext() + elif isinstance(element, str) or isinstance(element, bytes): + # Text in a fragment + assert isinstance(element, str) or sys.version_info[0] == 2 + rv.append("|%s\"%s\"" % (' ' * indent, element)) + else: + # Fragment case + rv.append("#document-fragment") + for next_element in element: + serializeElement(next_element, indent + 2) + elif element.tag == comment_type: + rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) + if hasattr(element, "tail") and element.tail: + rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) + else: + assert isinstance(element, etree._Element) + nsmatch = etree_builders.tag_regexp.match(element.tag) + if nsmatch is not None: + ns = nsmatch.group(1) + tag = nsmatch.group(2) + prefix = constants.prefixes[ns] + rv.append("|%s<%s %s>" % (' ' * indent, prefix, + infosetFilter.fromXmlName(tag))) + else: + rv.append("|%s<%s>" % (' ' * indent, + infosetFilter.fromXmlName(element.tag))) + + if hasattr(element, "attrib"): + attributes = [] + for name, value in element.attrib.items(): + nsmatch = tag_regexp.match(name) + if nsmatch is not None: + ns, name = nsmatch.groups() + name = infosetFilter.fromXmlName(name) + prefix = constants.prefixes[ns] + attr_string = "%s %s" % (prefix, name) + else: + attr_string = infosetFilter.fromXmlName(name) + attributes.append((attr_string, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + + if element.text: + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + indent += 2 + for child in element: + serializeElement(child, indent) + if hasattr(element, "tail") and element.tail: + rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + serializeElement(element, 0) + + return "\n".join(rv) + + +def tostring(element): + """Serialize an element and its child nodes to a string""" + rv = [] + + def serializeElement(element): + if not hasattr(element, "tag"): + if element.docinfo.internalDTD: + if element.docinfo.doctype: + dtd_str = element.docinfo.doctype + else: + dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name + rv.append(dtd_str) + serializeElement(element.getroot()) + + elif element.tag == comment_type: + rv.append("<!--%s-->" % (element.text,)) + + else: + # This is assumed to be an ordinary element + if not element.attrib: + rv.append("<%s>" % (element.tag,)) + else: + attr = " ".join(["%s=\"%s\"" % (name, value) + for name, value in element.attrib.items()]) + rv.append("<%s %s>" % (element.tag, attr)) + if element.text: + rv.append(element.text) + + for child in element: + serializeElement(child) + + rv.append("</%s>" % (element.tag,)) + + if hasattr(element, "tail") and element.tail: + rv.append(element.tail) + + serializeElement(element) + + return "".join(rv) + + +class TreeBuilder(base.TreeBuilder): + documentClass = Document + doctypeClass = DocumentType + elementClass = None + commentClass = None + fragmentClass = Document + implementation = etree + + def __init__(self, namespaceHTMLElements, fullTree=False): + builder = etree_builders.getETreeModule(etree, fullTree=fullTree) + infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) + self.namespaceHTMLElements = namespaceHTMLElements + + class Attributes(dict): + def __init__(self, element, value=None): + if value is None: + value = {} + self._element = element + dict.__init__(self, value) # pylint:disable=non-parent-init-called + for key, value in self.items(): + if isinstance(key, tuple): + name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) + else: + name = infosetFilter.coerceAttribute(key) + self._element._element.attrib[name] = value + + def __setitem__(self, key, value): + dict.__setitem__(self, key, value) + if isinstance(key, tuple): + name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) + else: + name = infosetFilter.coerceAttribute(key) + self._element._element.attrib[name] = value + + class Element(builder.Element): + def __init__(self, name, namespace): + name = infosetFilter.coerceElement(name) + builder.Element.__init__(self, name, namespace=namespace) + self._attributes = Attributes(self) + + def _setName(self, name): + self._name = infosetFilter.coerceElement(name) + self._element.tag = self._getETreeTag( + self._name, self._namespace) + + def _getName(self): + return infosetFilter.fromXmlName(self._name) + + name = property(_getName, _setName) + + def _getAttributes(self): + return self._attributes + + def _setAttributes(self, attributes): + self._attributes = Attributes(self, attributes) + + attributes = property(_getAttributes, _setAttributes) + + def insertText(self, data, insertBefore=None): + data = infosetFilter.coerceCharacters(data) + builder.Element.insertText(self, data, insertBefore) + + def appendChild(self, child): + builder.Element.appendChild(self, child) + + class Comment(builder.Comment): + def __init__(self, data): + data = infosetFilter.coerceComment(data) + builder.Comment.__init__(self, data) + + def _setData(self, data): + data = infosetFilter.coerceComment(data) + self._element.text = data + + def _getData(self): + return self._element.text + + data = property(_getData, _setData) + + self.elementClass = Element + self.commentClass = Comment + # self.fragmentClass = builder.DocumentFragment + base.TreeBuilder.__init__(self, namespaceHTMLElements) + + def reset(self): + base.TreeBuilder.reset(self) + self.insertComment = self.insertCommentInitial + self.initial_comments = [] + self.doctype = None + + def testSerializer(self, element): + return testSerializer(element) + + def getDocument(self): + if fullTree: + return self.document._elementTree + else: + return self.document._elementTree.getroot() + + def getFragment(self): + fragment = [] + element = self.openElements[0]._element + if element.text: + fragment.append(element.text) + fragment.extend(list(element)) + if element.tail: + fragment.append(element.tail) + return fragment + + def insertDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + + if not name: + warnings.warn("lxml cannot represent empty doctype", DataLossWarning) + self.doctype = None + else: + coercedName = self.infosetFilter.coerceElement(name) + if coercedName != name: + warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) + + doctype = self.doctypeClass(coercedName, publicId, systemId) + self.doctype = doctype + + def insertCommentInitial(self, data, parent=None): + assert parent is None or parent is self.document + assert self.document._elementTree is None + self.initial_comments.append(data) + + def insertCommentMain(self, data, parent=None): + if (parent == self.document and + self.document._elementTree.getroot()[-1].tag == comment_type): + warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) + super(TreeBuilder, self).insertComment(data, parent) + + def insertRoot(self, token): + # Because of the way libxml2 works, it doesn't seem to be possible to + # alter information like the doctype after the tree has been parsed. + # Therefore we need to use the built-in parser to create our initial + # tree, after which we can add elements like normal + docStr = "" + if self.doctype: + assert self.doctype.name + docStr += "<!DOCTYPE %s" % self.doctype.name + if (self.doctype.publicId is not None or + self.doctype.systemId is not None): + docStr += (' PUBLIC "%s" ' % + (self.infosetFilter.coercePubid(self.doctype.publicId or ""))) + if self.doctype.systemId: + sysid = self.doctype.systemId + if sysid.find("'") >= 0 and sysid.find('"') >= 0: + warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) + sysid = sysid.replace("'", 'U00027') + if sysid.find("'") >= 0: + docStr += '"%s"' % sysid + else: + docStr += "'%s'" % sysid + else: + docStr += "''" + docStr += ">" + if self.doctype.name != token["name"]: + warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) + docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>" + root = etree.fromstring(docStr) + + # Append the initial comments: + for comment_token in self.initial_comments: + comment = self.commentClass(comment_token["data"]) + root.addprevious(comment._element) + + # Create the root document and add the ElementTree to it + self.document = self.documentClass() + self.document._elementTree = root.getroottree() + + # Give the root element the right name + name = token["name"] + namespace = token.get("namespace", self.defaultNamespace) + if namespace is None: + etree_tag = name + else: + etree_tag = "{%s}%s" % (namespace, name) + root.tag = etree_tag + + # Add the root element to the internal child/open data structures + root_element = self.elementClass(name, namespace) + root_element._element = root + self.document._childNodes.append(root_element) + self.openElements.append(root_element) + + # Reset to the default insert comment function + self.insertComment = self.insertCommentMain diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py new file mode 100644 index 00000000..9bec2076 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py @@ -0,0 +1,154 @@ +"""A collection of modules for iterating through different kinds of +tree, generating tokens identical to those produced by the tokenizer +module. + +To create a tree walker for a new type of tree, you need to do +implement a tree walker object (called TreeWalker by convention) that +implements a 'serialize' method taking a tree as sole argument and +returning an iterator generating tokens. +""" + +from __future__ import absolute_import, division, unicode_literals + +from .. import constants +from .._utils import default_etree + +__all__ = ["getTreeWalker", "pprint"] + +treeWalkerCache = {} + + +def getTreeWalker(treeType, implementation=None, **kwargs): + """Get a TreeWalker class for various types of tree with built-in support + + :arg str treeType: the name of the tree type required (case-insensitive). + Supported values are: + + * "dom": The xml.dom.minidom DOM implementation + * "etree": A generic walker for tree implementations exposing an + elementtree-like interface (known to work with ElementTree, + cElementTree and lxml.etree). + * "lxml": Optimized walker for lxml.etree + * "genshi": a Genshi stream + + :arg implementation: A module implementing the tree type e.g. + xml.etree.ElementTree or cElementTree (Currently applies to the "etree" + tree type only). + + :arg kwargs: keyword arguments passed to the etree walker--for other + walkers, this has no effect + + :returns: a TreeWalker class + + """ + + treeType = treeType.lower() + if treeType not in treeWalkerCache: + if treeType == "dom": + from . import dom + treeWalkerCache[treeType] = dom.TreeWalker + elif treeType == "genshi": + from . import genshi + treeWalkerCache[treeType] = genshi.TreeWalker + elif treeType == "lxml": + from . import etree_lxml + treeWalkerCache[treeType] = etree_lxml.TreeWalker + elif treeType == "etree": + from . import etree + if implementation is None: + implementation = default_etree + # XXX: NEVER cache here, caching is done in the etree submodule + return etree.getETreeModule(implementation, **kwargs).TreeWalker + return treeWalkerCache.get(treeType) + + +def concatenateCharacterTokens(tokens): + pendingCharacters = [] + for token in tokens: + type = token["type"] + if type in ("Characters", "SpaceCharacters"): + pendingCharacters.append(token["data"]) + else: + if pendingCharacters: + yield {"type": "Characters", "data": "".join(pendingCharacters)} + pendingCharacters = [] + yield token + if pendingCharacters: + yield {"type": "Characters", "data": "".join(pendingCharacters)} + + +def pprint(walker): + """Pretty printer for tree walkers + + Takes a TreeWalker instance and pretty prints the output of walking the tree. + + :arg walker: a TreeWalker instance + + """ + output = [] + indent = 0 + for token in concatenateCharacterTokens(walker): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + # tag name + if token["namespace"] and token["namespace"] != constants.namespaces["html"]: + if token["namespace"] in constants.prefixes: + ns = constants.prefixes[token["namespace"]] + else: + ns = token["namespace"] + name = "%s %s" % (ns, token["name"]) + else: + name = token["name"] + output.append("%s<%s>" % (" " * indent, name)) + indent += 2 + # attributes (sorted for consistent ordering) + attrs = token["data"] + for (namespace, localname), value in sorted(attrs.items()): + if namespace: + if namespace in constants.prefixes: + ns = constants.prefixes[namespace] + else: + ns = namespace + name = "%s %s" % (ns, localname) + else: + name = localname + output.append("%s%s=\"%s\"" % (" " * indent, name, value)) + # self-closing + if type == "EmptyTag": + indent -= 2 + + elif type == "EndTag": + indent -= 2 + + elif type == "Comment": + output.append("%s<!-- %s -->" % (" " * indent, token["data"])) + + elif type == "Doctype": + if token["name"]: + if token["publicId"]: + output.append("""%s<!DOCTYPE %s "%s" "%s">""" % + (" " * indent, + token["name"], + token["publicId"], + token["systemId"] if token["systemId"] else "")) + elif token["systemId"]: + output.append("""%s<!DOCTYPE %s "" "%s">""" % + (" " * indent, + token["name"], + token["systemId"])) + else: + output.append("%s<!DOCTYPE %s>" % (" " * indent, + token["name"])) + else: + output.append("%s<!DOCTYPE >" % (" " * indent,)) + + elif type == "Characters": + output.append("%s\"%s\"" % (" " * indent, token["data"])) + + elif type == "SpaceCharacters": + assert False, "concatenateCharacterTokens should have got rid of all Space tokens" + + else: + raise ValueError("Unknown token type, %s" % type) + + return "\n".join(output) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..e33f4ae2 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..a3e9c3c9 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..3e3407de --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..23bc58c5 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..730e09a6 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..918c8cdd --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/base.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/base.py new file mode 100644 index 00000000..80c474c4 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/base.py @@ -0,0 +1,252 @@ +from __future__ import absolute_import, division, unicode_literals + +from xml.dom import Node +from ..constants import namespaces, voidElements, spaceCharacters + +__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", + "TreeWalker", "NonRecursiveTreeWalker"] + +DOCUMENT = Node.DOCUMENT_NODE +DOCTYPE = Node.DOCUMENT_TYPE_NODE +TEXT = Node.TEXT_NODE +ELEMENT = Node.ELEMENT_NODE +COMMENT = Node.COMMENT_NODE +ENTITY = Node.ENTITY_NODE +UNKNOWN = "<#UNKNOWN#>" + +spaceCharacters = "".join(spaceCharacters) + + +class TreeWalker(object): + """Walks a tree yielding tokens + + Tokens are dicts that all have a ``type`` field specifying the type of the + token. + + """ + def __init__(self, tree): + """Creates a TreeWalker + + :arg tree: the tree to walk + + """ + self.tree = tree + + def __iter__(self): + raise NotImplementedError + + def error(self, msg): + """Generates an error token with the given message + + :arg msg: the error message + + :returns: SerializeError token + + """ + return {"type": "SerializeError", "data": msg} + + def emptyTag(self, namespace, name, attrs, hasChildren=False): + """Generates an EmptyTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :arg attrs: the attributes of the element as a dict + + :arg hasChildren: whether or not to yield a SerializationError because + this tag shouldn't have children + + :returns: EmptyTag token + + """ + yield {"type": "EmptyTag", "name": name, + "namespace": namespace, + "data": attrs} + if hasChildren: + yield self.error("Void element has children") + + def startTag(self, namespace, name, attrs): + """Generates a StartTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :arg attrs: the attributes of the element as a dict + + :returns: StartTag token + + """ + return {"type": "StartTag", + "name": name, + "namespace": namespace, + "data": attrs} + + def endTag(self, namespace, name): + """Generates an EndTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :returns: EndTag token + + """ + return {"type": "EndTag", + "name": name, + "namespace": namespace} + + def text(self, data): + """Generates SpaceCharacters and Characters tokens + + Depending on what's in the data, this generates one or more + ``SpaceCharacters`` and ``Characters`` tokens. + + For example: + + >>> from html5lib.treewalkers.base import TreeWalker + >>> # Give it an empty tree just so it instantiates + >>> walker = TreeWalker([]) + >>> list(walker.text('')) + [] + >>> list(walker.text(' ')) + [{u'data': ' ', u'type': u'SpaceCharacters'}] + >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE + [{u'data': ' ', u'type': u'SpaceCharacters'}, + {u'data': u'abc', u'type': u'Characters'}, + {u'data': u' ', u'type': u'SpaceCharacters'}] + + :arg data: the text data + + :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens + + """ + data = data + middle = data.lstrip(spaceCharacters) + left = data[:len(data) - len(middle)] + if left: + yield {"type": "SpaceCharacters", "data": left} + data = middle + middle = data.rstrip(spaceCharacters) + right = data[len(middle):] + if middle: + yield {"type": "Characters", "data": middle} + if right: + yield {"type": "SpaceCharacters", "data": right} + + def comment(self, data): + """Generates a Comment token + + :arg data: the comment + + :returns: Comment token + + """ + return {"type": "Comment", "data": data} + + def doctype(self, name, publicId=None, systemId=None): + """Generates a Doctype token + + :arg name: + + :arg publicId: + + :arg systemId: + + :returns: the Doctype token + + """ + return {"type": "Doctype", + "name": name, + "publicId": publicId, + "systemId": systemId} + + def entity(self, name): + """Generates an Entity token + + :arg name: the entity name + + :returns: an Entity token + + """ + return {"type": "Entity", "name": name} + + def unknown(self, nodeType): + """Handles unknown node types""" + return self.error("Unknown node type: " + nodeType) + + +class NonRecursiveTreeWalker(TreeWalker): + def getNodeDetails(self, node): + raise NotImplementedError + + def getFirstChild(self, node): + raise NotImplementedError + + def getNextSibling(self, node): + raise NotImplementedError + + def getParentNode(self, node): + raise NotImplementedError + + def __iter__(self): + currentNode = self.tree + while currentNode is not None: + details = self.getNodeDetails(currentNode) + type, details = details[0], details[1:] + hasChildren = False + + if type == DOCTYPE: + yield self.doctype(*details) + + elif type == TEXT: + for token in self.text(*details): + yield token + + elif type == ELEMENT: + namespace, name, attributes, hasChildren = details + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + for token in self.emptyTag(namespace, name, attributes, + hasChildren): + yield token + hasChildren = False + else: + yield self.startTag(namespace, name, attributes) + + elif type == COMMENT: + yield self.comment(details[0]) + + elif type == ENTITY: + yield self.entity(details[0]) + + elif type == DOCUMENT: + hasChildren = True + + else: + yield self.unknown(details[0]) + + if hasChildren: + firstChild = self.getFirstChild(currentNode) + else: + firstChild = None + + if firstChild is not None: + currentNode = firstChild + else: + while currentNode is not None: + details = self.getNodeDetails(currentNode) + type, details = details[0], details[1:] + if type == ELEMENT: + namespace, name, attributes, hasChildren = details + if (namespace and namespace != namespaces["html"]) or name not in voidElements: + yield self.endTag(namespace, name) + if self.tree is currentNode: + currentNode = None + break + nextSibling = self.getNextSibling(currentNode) + if nextSibling is not None: + currentNode = nextSibling + break + else: + currentNode = self.getParentNode(currentNode) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/dom.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/dom.py new file mode 100644 index 00000000..b0c89b00 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/dom.py @@ -0,0 +1,43 @@ +from __future__ import absolute_import, division, unicode_literals + +from xml.dom import Node + +from . import base + + +class TreeWalker(base.NonRecursiveTreeWalker): + def getNodeDetails(self, node): + if node.nodeType == Node.DOCUMENT_TYPE_NODE: + return base.DOCTYPE, node.name, node.publicId, node.systemId + + elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + return base.TEXT, node.nodeValue + + elif node.nodeType == Node.ELEMENT_NODE: + attrs = {} + for attr in list(node.attributes.keys()): + attr = node.getAttributeNode(attr) + if attr.namespaceURI: + attrs[(attr.namespaceURI, attr.localName)] = attr.value + else: + attrs[(None, attr.name)] = attr.value + return (base.ELEMENT, node.namespaceURI, node.nodeName, + attrs, node.hasChildNodes()) + + elif node.nodeType == Node.COMMENT_NODE: + return base.COMMENT, node.nodeValue + + elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): + return (base.DOCUMENT,) + + else: + return base.UNKNOWN, node.nodeType + + def getFirstChild(self, node): + return node.firstChild + + def getNextSibling(self, node): + return node.nextSibling + + def getParentNode(self, node): + return node.parentNode diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree.py new file mode 100644 index 00000000..95fc0c17 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree.py @@ -0,0 +1,130 @@ +from __future__ import absolute_import, division, unicode_literals + +from collections import OrderedDict +import re + +from pip._vendor.six import string_types + +from . import base +from .._utils import moduleFactoryFactory + +tag_regexp = re.compile("{([^}]*)}(.*)") + + +def getETreeBuilder(ElementTreeImplementation): + ElementTree = ElementTreeImplementation + ElementTreeCommentType = ElementTree.Comment("asd").tag + + class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable + """Given the particular ElementTree representation, this implementation, + to avoid using recursion, returns "nodes" as tuples with the following + content: + + 1. The current element + + 2. The index of the element relative to its parent + + 3. A stack of ancestor elements + + 4. A flag "text", "tail" or None to indicate if the current node is a + text node; either the text or tail of the current element (1) + """ + def getNodeDetails(self, node): + if isinstance(node, tuple): # It might be the root Element + elt, _, _, flag = node + if flag in ("text", "tail"): + return base.TEXT, getattr(elt, flag) + else: + node = elt + + if not(hasattr(node, "tag")): + node = node.getroot() + + if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): + return (base.DOCUMENT,) + + elif node.tag == "<!DOCTYPE>": + return (base.DOCTYPE, node.text, + node.get("publicId"), node.get("systemId")) + + elif node.tag == ElementTreeCommentType: + return base.COMMENT, node.text + + else: + assert isinstance(node.tag, string_types), type(node.tag) + # This is assumed to be an ordinary element + match = tag_regexp.match(node.tag) + if match: + namespace, tag = match.groups() + else: + namespace = None + tag = node.tag + attrs = OrderedDict() + for name, value in list(node.attrib.items()): + match = tag_regexp.match(name) + if match: + attrs[(match.group(1), match.group(2))] = value + else: + attrs[(None, name)] = value + return (base.ELEMENT, namespace, tag, + attrs, len(node) or node.text) + + def getFirstChild(self, node): + if isinstance(node, tuple): + element, key, parents, flag = node + else: + element, key, parents, flag = node, None, [], None + + if flag in ("text", "tail"): + return None + else: + if element.text: + return element, key, parents, "text" + elif len(element): + parents.append(element) + return element[0], 0, parents, None + else: + return None + + def getNextSibling(self, node): + if isinstance(node, tuple): + element, key, parents, flag = node + else: + return None + + if flag == "text": + if len(element): + parents.append(element) + return element[0], 0, parents, None + else: + return None + else: + if element.tail and flag != "tail": + return element, key, parents, "tail" + elif key < len(parents[-1]) - 1: + return parents[-1][key + 1], key + 1, parents, None + else: + return None + + def getParentNode(self, node): + if isinstance(node, tuple): + element, key, parents, flag = node + else: + return None + + if flag == "text": + if not parents: + return element + else: + return element, key, parents, None + else: + parent = parents.pop() + if not parents: + return parent + else: + assert list(parents[-1]).count(parent) == 1 + return parent, list(parents[-1]).index(parent), parents, None + + return locals() + +getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree_lxml.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree_lxml.py new file mode 100644 index 00000000..e81ddf33 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree_lxml.py @@ -0,0 +1,213 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import text_type + +from lxml import etree +from ..treebuilders.etree import tag_regexp + +from . import base + +from .. import _ihatexml + + +def ensure_str(s): + if s is None: + return None + elif isinstance(s, text_type): + return s + else: + return s.decode("ascii", "strict") + + +class Root(object): + def __init__(self, et): + self.elementtree = et + self.children = [] + + try: + if et.docinfo.internalDTD: + self.children.append(Doctype(self, + ensure_str(et.docinfo.root_name), + ensure_str(et.docinfo.public_id), + ensure_str(et.docinfo.system_url))) + except AttributeError: + pass + + try: + node = et.getroot() + except AttributeError: + node = et + + while node.getprevious() is not None: + node = node.getprevious() + while node is not None: + self.children.append(node) + node = node.getnext() + + self.text = None + self.tail = None + + def __getitem__(self, key): + return self.children[key] + + def getnext(self): + return None + + def __len__(self): + return 1 + + +class Doctype(object): + def __init__(self, root_node, name, public_id, system_id): + self.root_node = root_node + self.name = name + self.public_id = public_id + self.system_id = system_id + + self.text = None + self.tail = None + + def getnext(self): + return self.root_node.children[1] + + +class FragmentRoot(Root): + def __init__(self, children): + self.children = [FragmentWrapper(self, child) for child in children] + self.text = self.tail = None + + def getnext(self): + return None + + +class FragmentWrapper(object): + def __init__(self, fragment_root, obj): + self.root_node = fragment_root + self.obj = obj + if hasattr(self.obj, 'text'): + self.text = ensure_str(self.obj.text) + else: + self.text = None + if hasattr(self.obj, 'tail'): + self.tail = ensure_str(self.obj.tail) + else: + self.tail = None + + def __getattr__(self, name): + return getattr(self.obj, name) + + def getnext(self): + siblings = self.root_node.children + idx = siblings.index(self) + if idx < len(siblings) - 1: + return siblings[idx + 1] + else: + return None + + def __getitem__(self, key): + return self.obj[key] + + def __bool__(self): + return bool(self.obj) + + def getparent(self): + return None + + def __str__(self): + return str(self.obj) + + def __unicode__(self): + return str(self.obj) + + def __len__(self): + return len(self.obj) + + +class TreeWalker(base.NonRecursiveTreeWalker): + def __init__(self, tree): + # pylint:disable=redefined-variable-type + if isinstance(tree, list): + self.fragmentChildren = set(tree) + tree = FragmentRoot(tree) + else: + self.fragmentChildren = set() + tree = Root(tree) + base.NonRecursiveTreeWalker.__init__(self, tree) + self.filter = _ihatexml.InfosetFilter() + + def getNodeDetails(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key + return base.TEXT, ensure_str(getattr(node, key)) + + elif isinstance(node, Root): + return (base.DOCUMENT,) + + elif isinstance(node, Doctype): + return base.DOCTYPE, node.name, node.public_id, node.system_id + + elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): + return base.TEXT, ensure_str(node.obj) + + elif node.tag == etree.Comment: + return base.COMMENT, ensure_str(node.text) + + elif node.tag == etree.Entity: + return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; + + else: + # This is assumed to be an ordinary element + match = tag_regexp.match(ensure_str(node.tag)) + if match: + namespace, tag = match.groups() + else: + namespace = None + tag = ensure_str(node.tag) + attrs = {} + for name, value in list(node.attrib.items()): + name = ensure_str(name) + value = ensure_str(value) + match = tag_regexp.match(name) + if match: + attrs[(match.group(1), match.group(2))] = value + else: + attrs[(None, name)] = value + return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), + attrs, len(node) > 0 or node.text) + + def getFirstChild(self, node): + assert not isinstance(node, tuple), "Text nodes have no children" + + assert len(node) or node.text, "Node has no children" + if node.text: + return (node, "text") + else: + return node[0] + + def getNextSibling(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key + if key == "text": + # XXX: we cannot use a "bool(node) and node[0] or None" construct here + # because node[0] might evaluate to False if it has no child element + if len(node): + return node[0] + else: + return None + else: # tail + return node.getnext() + + return (node, "tail") if node.tail else node.getnext() + + def getParentNode(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key + if key == "text": + return node + # else: fallback to "normal" processing + elif node in self.fragmentChildren: + return None + + return node.getparent() diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/genshi.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/genshi.py new file mode 100644 index 00000000..7483be27 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/genshi.py @@ -0,0 +1,69 @@ +from __future__ import absolute_import, division, unicode_literals + +from genshi.core import QName +from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT +from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT + +from . import base + +from ..constants import voidElements, namespaces + + +class TreeWalker(base.TreeWalker): + def __iter__(self): + # Buffer the events so we can pass in the following one + previous = None + for event in self.tree: + if previous is not None: + for token in self.tokens(previous, event): + yield token + previous = event + + # Don't forget the final event! + if previous is not None: + for token in self.tokens(previous, None): + yield token + + def tokens(self, event, next): + kind, data, _ = event + if kind == START: + tag, attribs = data + name = tag.localname + namespace = tag.namespace + converted_attribs = {} + for k, v in attribs: + if isinstance(k, QName): + converted_attribs[(k.namespace, k.localname)] = v + else: + converted_attribs[(None, k)] = v + + if namespace == namespaces["html"] and name in voidElements: + for token in self.emptyTag(namespace, name, converted_attribs, + not next or next[0] != END or + next[1] != tag): + yield token + else: + yield self.startTag(namespace, name, converted_attribs) + + elif kind == END: + name = data.localname + namespace = data.namespace + if namespace != namespaces["html"] or name not in voidElements: + yield self.endTag(namespace, name) + + elif kind == COMMENT: + yield self.comment(data) + + elif kind == TEXT: + for token in self.text(data): + yield token + + elif kind == DOCTYPE: + yield self.doctype(*data) + + elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, + START_CDATA, END_CDATA, PI): + pass + + else: + yield self.unknown(kind) |