我们从Python开源项目中,提取了以下40个代码示例,用于说明如何使用lxml.etree.iselement()。
def tokenize(html, include_hrefs=True): """ Parse the given HTML and returns token objects (words with attached tags). This parses only the content of a page; anything in the head is ignored, and the <head> and <body> elements are themselves optional. The content is then parsed by lxml, which ensures the validity of the resulting parsed document (though lxml may make incorrect guesses when the markup is particular bad). <ins> and <del> tags are also eliminated from the document, as that gets confusing. If include_hrefs is true, then the href attribute of <a> tags is included as a special kind of diffable token.""" if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: return fixup_chunks(chunks)
def get_xml_attribute(node, attributename): """ Get an attribute value from a node. :param node: a node from etree :param attributename: a string with the attribute name. :returns either attributevalue, or None """ if etree.iselement(node): attrib_value = node.get(attributename) if attrib_value: return attrib_value else: parser_info['parser_warnings'].append('Tried to get attribute: "{}" from element {}.\n ' 'I recieved "{}", maybe the attribute does not exist'.format(attributename, node, attrib_value)) return None else: # something doesn't work here some nodes get through here parser_info['parser_warnings'].append('Can not get attributename: "{}" from node "{}", because node is not an element of etree.'.format(attributename,node)) return None ### call
def SetXmlBlob(self, blob): """Sets the contents of the extendedProperty to XML as a child node. Since the extendedProperty is only allowed one child element as an XML blob, setting the XML blob will erase any preexisting extension elements in this object. Args: blob: str, ElementTree Element or atom.ExtensionElement representing the XML blob stored in the extendedProperty. """ # Erase any existing extension_elements, clears the child nodes from the # extendedProperty. self.extension_elements = [] if isinstance(blob, atom.ExtensionElement): self.extension_elements.append(blob) elif ElementTree.iselement(blob): self.extension_elements.append(atom._ExtensionElementFromElementTree( blob)) else: self.extension_elements.append(atom.ExtensionElementFromString(blob))
def __SendDataPart(data, connection): """This method is deprecated, use atom.http._send_data_part""" deprecated('call to deprecated function __SendDataPart') if isinstance(data, str): # TODO add handling for unicode. connection.send(data) return elif ElementTree.iselement(data): connection.send(ElementTree.tostring(data)) return # Check to see if data is a file-like object that has a read method. elif hasattr(data, 'read'): # Read the file and send it a chunk at a time. while 1: binarydata = data.read(100000) if binarydata == '': break connection.send(binarydata) return else: # The data object was not a file. # Try to convert to a string and send the data. connection.send(str(data)) return
def CalculateDataLength(data): """Attempts to determine the length of the data to send. This method will respond with a length only if the data is a string or and ElementTree element. Args: data: object If this is not a string or ElementTree element this funtion will return None. """ if isinstance(data, str): return len(data) elif isinstance(data, list): return None elif ElementTree.iselement(data): return len(ElementTree.tostring(data)) elif hasattr(data, 'read'): # If this is a file-like object, don't try to guess the length. return None else: return len(str(data))
def _xml_obj_from_str(xml_str, dev): xml_req_tree = None try: xml_req_tree = etree.fromstring(xml_str) except etree.XMLSyntaxError: xml_req_tree = _build_xml(xml_str, dev) if not etree.iselement(xml_req_tree): # still not XML obj, but should raise InvalidRequestError( dev, err='Invalid request "{req}"'.format( req=xml_str ) ) return xml_req_tree
def to_etree(doc, encoding=None): """Return the `doc` as an ElementTree object. Args: doc: A filename, file-like object, or etree object (Element/ElementTree). encoding: The file encoding. Returns: An etree ElementTree object. """ if is_etree(doc): return doc elif etree.iselement(doc): return etree.ElementTree(doc) else: parser = get_xml_parser(encoding=encoding) return etree.parse(doc, parser=parser)
def _send_request(self, request): """Send XML data to OpenVAS Manager and get results""" block_size = 1024 if etree.iselement(request): root = etree.ElementTree(request) root.write(self.socket, encoding="utf-8") else: if isinstance(request, six.text_type): request = request.encode("utf-8") self.socket.send(request) parser = etree.XMLTreeBuilder() while True: response = self.socket.recv(block_size) parser.feed(response) if len(response) < block_size: break root = parser.close() return root
def convert(parent, entry): if isinstance(parent, str): parent = etree.Element(parent) if isinstance(entry, dict): return _convert_dict(parent, entry) elif isinstance(entry, list): return _convert_list(parent, entry) elif etree.iselement(entry): parent.append(entry) else: parent.text = str(entry)
def __call__(self, tag, *children, **attrib): get = self._typemap.get if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: get(dict)(elem, attrib) for item in children: if callable(item): item = item() t = get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: get(type(v))(elem, v) return elem
def remove_empty_tags(tree): if tree.text is not None and tree.text.strip() == "": tree.text = None for elem in tree: if etree.iselement(elem): remove_empty_tags(elem)
def jsonify(fun): """ Transforms the XML reply into a JSON. """ def _jsonify(*vargs, **kvargs): ret = fun(*vargs, **kvargs) ret_xml = None if isinstance(ret, GetReply): ret_xml = ret.data_xml elif isinstance(ret, RPCReply): ret_xml = str(ret) if ret_xml: ret_json = json.loads(json.dumps(jxmlease.parse(ret_xml))) return ret_json else: reply_obj = None if etree.iselement(ret): reply_obj = etree.tostring(ret)[:1024] # up to 1024 chars else: reply_obj = str(ret) # trying this err = { 'msg': 'Invalid XML reply', 'obj': reply_obj } raise InvalidXMLReplyError(vargs[0]._dev, err) return _jsonify
def is_element(node): """Return True if the input `node` is an XML element node.""" return etree.iselement(node)
def test_response_init(response): # attributes assert response.ok assert response.status_code is 200 assert response.command == "test" assert iselement(response.xml) # data dict elements assert response["@test_id"] == "1234" assert response["child"]["@id"] == "1234"
def test_client_send_request(client): response = client._send_request("<describe_auth/>") assert etree.iselement(response)
def test_download_report_with_xml_format(self, client, report): response = client.download_report(uuid=report["@id"]) assert etree.iselement(response) assert response.attrib["id"] == report["@id"]
def preRunMacro(self, obj, parameters): self._clearRunMacro() xml_root = None if isinstance(obj, (str, unicode)): if obj.startswith('<') and not parameters: xml_root = etree.fromstring(obj) else: macros = [] if len(parameters) == 0: macros_strs = obj.split('\n') for m in macros_strs: pars = m.split() macros.append((pars[0], pars[1:])) else: parameters = recur_map(str, parameters) macros.append((obj, parameters)) xml_root = xml_seq = etree.Element('sequence') for m in macros: macro_name = m[0] macro_params = m[1] xml_macro = self._createMacroXml(macro_name, macro_params) xml_macro.set('id', str(uuid.uuid1())) xml_seq.append(xml_macro) elif etree.iselement(obj): xml_root = obj else: raise TypeError('obj must be a string or a etree.Element') self._running_macros = {} for macro_xml in xml_root.xpath('//macro'): id, name = macro_xml.get('id'), macro_xml.get('name') self._running_macros[id] = Macro(self, name, id, macro_xml) return xml_root
def get_xml_attribute(node, attributename, parser_info_out={}): """ Get an attribute value from a node. :param node: a node from etree :param attributename: a string with the attribute name. :returns either attributevalue, or None """ if etree.iselement(node): attrib_value = node.get(attributename) if attrib_value: return attrib_value else: if parser_info_out: parser_info_out['parser_warnings'].append( 'Tried to get attribute: "{}" from element {}.\n ' 'I recieved "{}", maybe the attribute does not exist' ''.format(attributename, node, attrib_value)) else: print( 'Can not get attributename: "{}" from node "{}", ' 'because node is not an element of etree.' ''.format(attributename, node)) return None else: # something doesn't work here, some nodes get through here if parser_info_out: parser_info_out['parser_warnings'].append( 'Can not get attributename: "{}" from node "{}", ' 'because node is not an element of etree.' ''.format(attributename, node)) else: print( 'Can not get attributename: "{}" from node "{}", ' 'because node is not an element of etree.' ''.format(attributename, node)) return None # TODO this has to be done better. be able to write tags and # certain attributes of attributes that occur possible more then once. # HINT: This is not really used anymore. use fleurinpmodifier
def get_shape_elements(self, rootnode, shapetags=_DEFAULT_SHAPES, parent_transform=None, skip_layers=None, accumulate_transform=True): """ Traverse a tree of SVG nodes and flatten it to a list of tuples containing an SVG shape element and its accumulated transform. This does a depth-first traversal of <g> and <use> elements. Hidden elements are ignored. Args: rootnode: The root of the node tree to traverse and flatten. This can be the document root, a layer, or simply a list of element nodes. shapetags: List of shape element tags that can be fetched. Default is ('path', 'rect', 'line', 'circle', 'ellipse', 'polyline', 'polygon'). Anything else is ignored. parent_transform: Transform matrix to add to each node's transforms. If None the node's parent transform is used. skip_layers: A list of layer names (as regexes) to ignore accumulate_transform: Apply parent transform(s) to element node if True. Default is True. Returns: A possibly empty list of 2-tuples consisting of SVG element and accumulated transform. """ if etree.iselement(rootnode): if not self.node_is_visible(rootnode): return [] check_parent = False else: # rootnode will be a list of possibly non-sibling element nodes # so the parent's visibility should be checked for each node. check_parent = True nodes = [] for node in rootnode: nodes.extend(self._get_shape_nodes_recurs(node, shapetags, parent_transform, check_parent, skip_layers, accumulate_transform)) return nodes
def css_to_func(css, flags, css_namespaces, lang): """Convert a css selector to an xpath, supporting pseudo elements.""" from cssselect import parse, HTMLTranslator from cssselect.parser import FunctionalPseudoElement # FIXME HACK need lessc to support functional-pseudo-selectors instead # of marking as strings and stripping " here. if not (css): return None sel = parse(css.strip('" '))[0] xpath = HTMLTranslator().selector_to_xpath(sel) first_letter = False if sel.pseudo_element is not None: if type(sel.pseudo_element) == FunctionalPseudoElement: if sel.pseudo_element.name in ('attr', 'first-letter'): xpath += '/@' + sel.pseudo_element.arguments[0].value if sel.pseudo_element.name == 'first-letter': first_letter = True elif type(sel.pseudo_element) == unicode: if sel.pseudo_element == 'first-letter': first_letter = True xp = etree.XPath(xpath, namespaces=css_namespaces) def toupper(u): """Use icu library for locale sensitive uppercasing (python2).""" loc = Locale(lang) if lang else Locale() return unicode(UnicodeString(u).toUpper(loc)) def func(elem): res = xp(elem) if res: if etree.iselement(res[0]): res_str = etree.tostring(res[0], encoding='unicode', method="text") else: res_str = res[0] if first_letter: if res_str: if flags and 'nocase' in flags: return toupper(res_str[0]) else: return res_str[0] else: return res_str else: if flags and 'nocase' in flags: return toupper(res_str) else: return res_str return func