diff options
Diffstat (limited to '.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers')
12 files changed, 861 insertions, 0 deletions
diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py new file mode 100644 index 00000000..9bec2076 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py @@ -0,0 +1,154 @@ +"""A collection of modules for iterating through different kinds of +tree, generating tokens identical to those produced by the tokenizer +module. + +To create a tree walker for a new type of tree, you need to do +implement a tree walker object (called TreeWalker by convention) that +implements a 'serialize' method taking a tree as sole argument and +returning an iterator generating tokens. +""" + +from __future__ import absolute_import, division, unicode_literals + +from .. import constants +from .._utils import default_etree + +__all__ = ["getTreeWalker", "pprint"] + +treeWalkerCache = {} + + +def getTreeWalker(treeType, implementation=None, **kwargs): + """Get a TreeWalker class for various types of tree with built-in support + + :arg str treeType: the name of the tree type required (case-insensitive). + Supported values are: + + * "dom": The xml.dom.minidom DOM implementation + * "etree": A generic walker for tree implementations exposing an + elementtree-like interface (known to work with ElementTree, + cElementTree and lxml.etree). + * "lxml": Optimized walker for lxml.etree + * "genshi": a Genshi stream + + :arg implementation: A module implementing the tree type e.g. + xml.etree.ElementTree or cElementTree (Currently applies to the "etree" + tree type only). + + :arg kwargs: keyword arguments passed to the etree walker--for other + walkers, this has no effect + + :returns: a TreeWalker class + + """ + + treeType = treeType.lower() + if treeType not in treeWalkerCache: + if treeType == "dom": + from . import dom + treeWalkerCache[treeType] = dom.TreeWalker + elif treeType == "genshi": + from . import genshi + treeWalkerCache[treeType] = genshi.TreeWalker + elif treeType == "lxml": + from . import etree_lxml + treeWalkerCache[treeType] = etree_lxml.TreeWalker + elif treeType == "etree": + from . import etree + if implementation is None: + implementation = default_etree + # XXX: NEVER cache here, caching is done in the etree submodule + return etree.getETreeModule(implementation, **kwargs).TreeWalker + return treeWalkerCache.get(treeType) + + +def concatenateCharacterTokens(tokens): + pendingCharacters = [] + for token in tokens: + type = token["type"] + if type in ("Characters", "SpaceCharacters"): + pendingCharacters.append(token["data"]) + else: + if pendingCharacters: + yield {"type": "Characters", "data": "".join(pendingCharacters)} + pendingCharacters = [] + yield token + if pendingCharacters: + yield {"type": "Characters", "data": "".join(pendingCharacters)} + + +def pprint(walker): + """Pretty printer for tree walkers + + Takes a TreeWalker instance and pretty prints the output of walking the tree. + + :arg walker: a TreeWalker instance + + """ + output = [] + indent = 0 + for token in concatenateCharacterTokens(walker): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + # tag name + if token["namespace"] and token["namespace"] != constants.namespaces["html"]: + if token["namespace"] in constants.prefixes: + ns = constants.prefixes[token["namespace"]] + else: + ns = token["namespace"] + name = "%s %s" % (ns, token["name"]) + else: + name = token["name"] + output.append("%s<%s>" % (" " * indent, name)) + indent += 2 + # attributes (sorted for consistent ordering) + attrs = token["data"] + for (namespace, localname), value in sorted(attrs.items()): + if namespace: + if namespace in constants.prefixes: + ns = constants.prefixes[namespace] + else: + ns = namespace + name = "%s %s" % (ns, localname) + else: + name = localname + output.append("%s%s=\"%s\"" % (" " * indent, name, value)) + # self-closing + if type == "EmptyTag": + indent -= 2 + + elif type == "EndTag": + indent -= 2 + + elif type == "Comment": + output.append("%s<!-- %s -->" % (" " * indent, token["data"])) + + elif type == "Doctype": + if token["name"]: + if token["publicId"]: + output.append("""%s<!DOCTYPE %s "%s" "%s">""" % + (" " * indent, + token["name"], + token["publicId"], + token["systemId"] if token["systemId"] else "")) + elif token["systemId"]: + output.append("""%s<!DOCTYPE %s "" "%s">""" % + (" " * indent, + token["name"], + token["systemId"])) + else: + output.append("%s<!DOCTYPE %s>" % (" " * indent, + token["name"])) + else: + output.append("%s<!DOCTYPE >" % (" " * indent,)) + + elif type == "Characters": + output.append("%s\"%s\"" % (" " * indent, token["data"])) + + elif type == "SpaceCharacters": + assert False, "concatenateCharacterTokens should have got rid of all Space tokens" + + else: + raise ValueError("Unknown token type, %s" % type) + + return "\n".join(output) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..e33f4ae2 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/__init__.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..a3e9c3c9 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/base.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..3e3407de --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/dom.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..23bc58c5 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..730e09a6 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/etree_lxml.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc Binary files differnew file mode 100644 index 00000000..918c8cdd --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/__pycache__/genshi.cpython-37.pyc diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/base.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/base.py new file mode 100644 index 00000000..80c474c4 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/base.py @@ -0,0 +1,252 @@ +from __future__ import absolute_import, division, unicode_literals + +from xml.dom import Node +from ..constants import namespaces, voidElements, spaceCharacters + +__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", + "TreeWalker", "NonRecursiveTreeWalker"] + +DOCUMENT = Node.DOCUMENT_NODE +DOCTYPE = Node.DOCUMENT_TYPE_NODE +TEXT = Node.TEXT_NODE +ELEMENT = Node.ELEMENT_NODE +COMMENT = Node.COMMENT_NODE +ENTITY = Node.ENTITY_NODE +UNKNOWN = "<#UNKNOWN#>" + +spaceCharacters = "".join(spaceCharacters) + + +class TreeWalker(object): + """Walks a tree yielding tokens + + Tokens are dicts that all have a ``type`` field specifying the type of the + token. + + """ + def __init__(self, tree): + """Creates a TreeWalker + + :arg tree: the tree to walk + + """ + self.tree = tree + + def __iter__(self): + raise NotImplementedError + + def error(self, msg): + """Generates an error token with the given message + + :arg msg: the error message + + :returns: SerializeError token + + """ + return {"type": "SerializeError", "data": msg} + + def emptyTag(self, namespace, name, attrs, hasChildren=False): + """Generates an EmptyTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :arg attrs: the attributes of the element as a dict + + :arg hasChildren: whether or not to yield a SerializationError because + this tag shouldn't have children + + :returns: EmptyTag token + + """ + yield {"type": "EmptyTag", "name": name, + "namespace": namespace, + "data": attrs} + if hasChildren: + yield self.error("Void element has children") + + def startTag(self, namespace, name, attrs): + """Generates a StartTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :arg attrs: the attributes of the element as a dict + + :returns: StartTag token + + """ + return {"type": "StartTag", + "name": name, + "namespace": namespace, + "data": attrs} + + def endTag(self, namespace, name): + """Generates an EndTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :returns: EndTag token + + """ + return {"type": "EndTag", + "name": name, + "namespace": namespace} + + def text(self, data): + """Generates SpaceCharacters and Characters tokens + + Depending on what's in the data, this generates one or more + ``SpaceCharacters`` and ``Characters`` tokens. + + For example: + + >>> from html5lib.treewalkers.base import TreeWalker + >>> # Give it an empty tree just so it instantiates + >>> walker = TreeWalker([]) + >>> list(walker.text('')) + [] + >>> list(walker.text(' ')) + [{u'data': ' ', u'type': u'SpaceCharacters'}] + >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE + [{u'data': ' ', u'type': u'SpaceCharacters'}, + {u'data': u'abc', u'type': u'Characters'}, + {u'data': u' ', u'type': u'SpaceCharacters'}] + + :arg data: the text data + + :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens + + """ + data = data + middle = data.lstrip(spaceCharacters) + left = data[:len(data) - len(middle)] + if left: + yield {"type": "SpaceCharacters", "data": left} + data = middle + middle = data.rstrip(spaceCharacters) + right = data[len(middle):] + if middle: + yield {"type": "Characters", "data": middle} + if right: + yield {"type": "SpaceCharacters", "data": right} + + def comment(self, data): + """Generates a Comment token + + :arg data: the comment + + :returns: Comment token + + """ + return {"type": "Comment", "data": data} + + def doctype(self, name, publicId=None, systemId=None): + """Generates a Doctype token + + :arg name: + + :arg publicId: + + :arg systemId: + + :returns: the Doctype token + + """ + return {"type": "Doctype", + "name": name, + "publicId": publicId, + "systemId": systemId} + + def entity(self, name): + """Generates an Entity token + + :arg name: the entity name + + :returns: an Entity token + + """ + return {"type": "Entity", "name": name} + + def unknown(self, nodeType): + """Handles unknown node types""" + return self.error("Unknown node type: " + nodeType) + + +class NonRecursiveTreeWalker(TreeWalker): + def getNodeDetails(self, node): + raise NotImplementedError + + def getFirstChild(self, node): + raise NotImplementedError + + def getNextSibling(self, node): + raise NotImplementedError + + def getParentNode(self, node): + raise NotImplementedError + + def __iter__(self): + currentNode = self.tree + while currentNode is not None: + details = self.getNodeDetails(currentNode) + type, details = details[0], details[1:] + hasChildren = False + + if type == DOCTYPE: + yield self.doctype(*details) + + elif type == TEXT: + for token in self.text(*details): + yield token + + elif type == ELEMENT: + namespace, name, attributes, hasChildren = details + if (not namespace or namespace == namespaces["html"]) and name in voidElements: + for token in self.emptyTag(namespace, name, attributes, + hasChildren): + yield token + hasChildren = False + else: + yield self.startTag(namespace, name, attributes) + + elif type == COMMENT: + yield self.comment(details[0]) + + elif type == ENTITY: + yield self.entity(details[0]) + + elif type == DOCUMENT: + hasChildren = True + + else: + yield self.unknown(details[0]) + + if hasChildren: + firstChild = self.getFirstChild(currentNode) + else: + firstChild = None + + if firstChild is not None: + currentNode = firstChild + else: + while currentNode is not None: + details = self.getNodeDetails(currentNode) + type, details = details[0], details[1:] + if type == ELEMENT: + namespace, name, attributes, hasChildren = details + if (namespace and namespace != namespaces["html"]) or name not in voidElements: + yield self.endTag(namespace, name) + if self.tree is currentNode: + currentNode = None + break + nextSibling = self.getNextSibling(currentNode) + if nextSibling is not None: + currentNode = nextSibling + break + else: + currentNode = self.getParentNode(currentNode) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/dom.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/dom.py new file mode 100644 index 00000000..b0c89b00 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/dom.py @@ -0,0 +1,43 @@ +from __future__ import absolute_import, division, unicode_literals + +from xml.dom import Node + +from . import base + + +class TreeWalker(base.NonRecursiveTreeWalker): + def getNodeDetails(self, node): + if node.nodeType == Node.DOCUMENT_TYPE_NODE: + return base.DOCTYPE, node.name, node.publicId, node.systemId + + elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + return base.TEXT, node.nodeValue + + elif node.nodeType == Node.ELEMENT_NODE: + attrs = {} + for attr in list(node.attributes.keys()): + attr = node.getAttributeNode(attr) + if attr.namespaceURI: + attrs[(attr.namespaceURI, attr.localName)] = attr.value + else: + attrs[(None, attr.name)] = attr.value + return (base.ELEMENT, node.namespaceURI, node.nodeName, + attrs, node.hasChildNodes()) + + elif node.nodeType == Node.COMMENT_NODE: + return base.COMMENT, node.nodeValue + + elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): + return (base.DOCUMENT,) + + else: + return base.UNKNOWN, node.nodeType + + def getFirstChild(self, node): + return node.firstChild + + def getNextSibling(self, node): + return node.nextSibling + + def getParentNode(self, node): + return node.parentNode diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree.py new file mode 100644 index 00000000..95fc0c17 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree.py @@ -0,0 +1,130 @@ +from __future__ import absolute_import, division, unicode_literals + +from collections import OrderedDict +import re + +from pip._vendor.six import string_types + +from . import base +from .._utils import moduleFactoryFactory + +tag_regexp = re.compile("{([^}]*)}(.*)") + + +def getETreeBuilder(ElementTreeImplementation): + ElementTree = ElementTreeImplementation + ElementTreeCommentType = ElementTree.Comment("asd").tag + + class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable + """Given the particular ElementTree representation, this implementation, + to avoid using recursion, returns "nodes" as tuples with the following + content: + + 1. The current element + + 2. The index of the element relative to its parent + + 3. A stack of ancestor elements + + 4. A flag "text", "tail" or None to indicate if the current node is a + text node; either the text or tail of the current element (1) + """ + def getNodeDetails(self, node): + if isinstance(node, tuple): # It might be the root Element + elt, _, _, flag = node + if flag in ("text", "tail"): + return base.TEXT, getattr(elt, flag) + else: + node = elt + + if not(hasattr(node, "tag")): + node = node.getroot() + + if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): + return (base.DOCUMENT,) + + elif node.tag == "<!DOCTYPE>": + return (base.DOCTYPE, node.text, + node.get("publicId"), node.get("systemId")) + + elif node.tag == ElementTreeCommentType: + return base.COMMENT, node.text + + else: + assert isinstance(node.tag, string_types), type(node.tag) + # This is assumed to be an ordinary element + match = tag_regexp.match(node.tag) + if match: + namespace, tag = match.groups() + else: + namespace = None + tag = node.tag + attrs = OrderedDict() + for name, value in list(node.attrib.items()): + match = tag_regexp.match(name) + if match: + attrs[(match.group(1), match.group(2))] = value + else: + attrs[(None, name)] = value + return (base.ELEMENT, namespace, tag, + attrs, len(node) or node.text) + + def getFirstChild(self, node): + if isinstance(node, tuple): + element, key, parents, flag = node + else: + element, key, parents, flag = node, None, [], None + + if flag in ("text", "tail"): + return None + else: + if element.text: + return element, key, parents, "text" + elif len(element): + parents.append(element) + return element[0], 0, parents, None + else: + return None + + def getNextSibling(self, node): + if isinstance(node, tuple): + element, key, parents, flag = node + else: + return None + + if flag == "text": + if len(element): + parents.append(element) + return element[0], 0, parents, None + else: + return None + else: + if element.tail and flag != "tail": + return element, key, parents, "tail" + elif key < len(parents[-1]) - 1: + return parents[-1][key + 1], key + 1, parents, None + else: + return None + + def getParentNode(self, node): + if isinstance(node, tuple): + element, key, parents, flag = node + else: + return None + + if flag == "text": + if not parents: + return element + else: + return element, key, parents, None + else: + parent = parents.pop() + if not parents: + return parent + else: + assert list(parents[-1]).count(parent) == 1 + return parent, list(parents[-1]).index(parent), parents, None + + return locals() + +getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree_lxml.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree_lxml.py new file mode 100644 index 00000000..e81ddf33 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/etree_lxml.py @@ -0,0 +1,213 @@ +from __future__ import absolute_import, division, unicode_literals +from pip._vendor.six import text_type + +from lxml import etree +from ..treebuilders.etree import tag_regexp + +from . import base + +from .. import _ihatexml + + +def ensure_str(s): + if s is None: + return None + elif isinstance(s, text_type): + return s + else: + return s.decode("ascii", "strict") + + +class Root(object): + def __init__(self, et): + self.elementtree = et + self.children = [] + + try: + if et.docinfo.internalDTD: + self.children.append(Doctype(self, + ensure_str(et.docinfo.root_name), + ensure_str(et.docinfo.public_id), + ensure_str(et.docinfo.system_url))) + except AttributeError: + pass + + try: + node = et.getroot() + except AttributeError: + node = et + + while node.getprevious() is not None: + node = node.getprevious() + while node is not None: + self.children.append(node) + node = node.getnext() + + self.text = None + self.tail = None + + def __getitem__(self, key): + return self.children[key] + + def getnext(self): + return None + + def __len__(self): + return 1 + + +class Doctype(object): + def __init__(self, root_node, name, public_id, system_id): + self.root_node = root_node + self.name = name + self.public_id = public_id + self.system_id = system_id + + self.text = None + self.tail = None + + def getnext(self): + return self.root_node.children[1] + + +class FragmentRoot(Root): + def __init__(self, children): + self.children = [FragmentWrapper(self, child) for child in children] + self.text = self.tail = None + + def getnext(self): + return None + + +class FragmentWrapper(object): + def __init__(self, fragment_root, obj): + self.root_node = fragment_root + self.obj = obj + if hasattr(self.obj, 'text'): + self.text = ensure_str(self.obj.text) + else: + self.text = None + if hasattr(self.obj, 'tail'): + self.tail = ensure_str(self.obj.tail) + else: + self.tail = None + + def __getattr__(self, name): + return getattr(self.obj, name) + + def getnext(self): + siblings = self.root_node.children + idx = siblings.index(self) + if idx < len(siblings) - 1: + return siblings[idx + 1] + else: + return None + + def __getitem__(self, key): + return self.obj[key] + + def __bool__(self): + return bool(self.obj) + + def getparent(self): + return None + + def __str__(self): + return str(self.obj) + + def __unicode__(self): + return str(self.obj) + + def __len__(self): + return len(self.obj) + + +class TreeWalker(base.NonRecursiveTreeWalker): + def __init__(self, tree): + # pylint:disable=redefined-variable-type + if isinstance(tree, list): + self.fragmentChildren = set(tree) + tree = FragmentRoot(tree) + else: + self.fragmentChildren = set() + tree = Root(tree) + base.NonRecursiveTreeWalker.__init__(self, tree) + self.filter = _ihatexml.InfosetFilter() + + def getNodeDetails(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key + return base.TEXT, ensure_str(getattr(node, key)) + + elif isinstance(node, Root): + return (base.DOCUMENT,) + + elif isinstance(node, Doctype): + return base.DOCTYPE, node.name, node.public_id, node.system_id + + elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): + return base.TEXT, ensure_str(node.obj) + + elif node.tag == etree.Comment: + return base.COMMENT, ensure_str(node.text) + + elif node.tag == etree.Entity: + return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; + + else: + # This is assumed to be an ordinary element + match = tag_regexp.match(ensure_str(node.tag)) + if match: + namespace, tag = match.groups() + else: + namespace = None + tag = ensure_str(node.tag) + attrs = {} + for name, value in list(node.attrib.items()): + name = ensure_str(name) + value = ensure_str(value) + match = tag_regexp.match(name) + if match: + attrs[(match.group(1), match.group(2))] = value + else: + attrs[(None, name)] = value + return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), + attrs, len(node) > 0 or node.text) + + def getFirstChild(self, node): + assert not isinstance(node, tuple), "Text nodes have no children" + + assert len(node) or node.text, "Node has no children" + if node.text: + return (node, "text") + else: + return node[0] + + def getNextSibling(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key + if key == "text": + # XXX: we cannot use a "bool(node) and node[0] or None" construct here + # because node[0] might evaluate to False if it has no child element + if len(node): + return node[0] + else: + return None + else: # tail + return node.getnext() + + return (node, "tail") if node.tail else node.getnext() + + def getParentNode(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key + if key == "text": + return node + # else: fallback to "normal" processing + elif node in self.fragmentChildren: + return None + + return node.getparent() diff --git a/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/genshi.py b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/genshi.py new file mode 100644 index 00000000..7483be27 --- /dev/null +++ b/.emacs.d.back/.python-environments/default/lib/python3.7/site-packages/pip/_vendor/html5lib/treewalkers/genshi.py @@ -0,0 +1,69 @@ +from __future__ import absolute_import, division, unicode_literals + +from genshi.core import QName +from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT +from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT + +from . import base + +from ..constants import voidElements, namespaces + + +class TreeWalker(base.TreeWalker): + def __iter__(self): + # Buffer the events so we can pass in the following one + previous = None + for event in self.tree: + if previous is not None: + for token in self.tokens(previous, event): + yield token + previous = event + + # Don't forget the final event! + if previous is not None: + for token in self.tokens(previous, None): + yield token + + def tokens(self, event, next): + kind, data, _ = event + if kind == START: + tag, attribs = data + name = tag.localname + namespace = tag.namespace + converted_attribs = {} + for k, v in attribs: + if isinstance(k, QName): + converted_attribs[(k.namespace, k.localname)] = v + else: + converted_attribs[(None, k)] = v + + if namespace == namespaces["html"] and name in voidElements: + for token in self.emptyTag(namespace, name, converted_attribs, + not next or next[0] != END or + next[1] != tag): + yield token + else: + yield self.startTag(namespace, name, converted_attribs) + + elif kind == END: + name = data.localname + namespace = data.namespace + if namespace != namespaces["html"] or name not in voidElements: + yield self.endTag(namespace, name) + + elif kind == COMMENT: + yield self.comment(data) + + elif kind == TEXT: + for token in self.text(data): + yield token + + elif kind == DOCTYPE: + yield self.doctype(*data) + + elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, + START_CDATA, END_CDATA, PI): + pass + + else: + yield self.unknown(kind) |