def extract_html_content(self, html_body, fix_html=True): """Ingestor implementation.""" if html_body is None: return try: try: doc = html.fromstring(html_body) except ValueError: # Ship around encoding declarations. # https://stackoverflow.com/questions/3402520 html_body = self.RE_XML_ENCODING.sub('', html_body, count=1) doc = html.fromstring(html_body) except (ParserError, ParseError, ValueError): raise ProcessingException("HTML could not be parsed.") self.extract_html_header(doc) self.cleaner(doc) text = self.extract_html_text(doc) self.result.flag(self.result.FLAG_HTML) self.result.emit_html_body(html_body, text)
def ingest(self, file_path): """Ingestor implementation.""" file_size = self.result.size or os.path.getsize(file_path) if file_size > self.MAX_SIZE: raise ProcessingException("XML file is too large.") try: doc = etree.parse(file_path) except (ParserError, ParseError): raise ProcessingException("XML could not be parsed.") text = self.extract_html_text(doc.getroot()) transform = etree.XSLT(self.XSLT) html_doc = transform(doc) html_body = html.tostring(html_doc, encoding='unicode', pretty_print=True) self.result.flag(self.result.FLAG_HTML) self.result.emit_html_body(html_body, text)
def validate_package(package_path, schema_file, ignore_errors=False): """ Validate your FOMOD installer. Raises ValidationError if installer is not valid. :param package_path: The root folder of your package. Should contain a "fomod" folder with the installer inside. :param schema_file: The path to the schema file, with filename and extension. :param ignore_errors: If true, the function returns False instead of throwing an error. """ try: fomod_folder = check_fomod(package_path) config_file = check_file(join(package_path, fomod_folder)) validate_tree(etree.parse(join(package_path, fomod_folder, config_file)), schema_file) return True except (MissingFolderError, MissingFileError): raise except etree.ParseError as e: raise ParserError(str(e)) except ValidationError as e: if ignore_errors: return False raise ValidationError(str(e).replace("The Config tree is invalid with error message:\n\n", check_file(join(package_path, check_fomod(package_path))) + " is invalid with error message:\n\n"))
def validate_tree(elem_tree, schema_file, ignore_errors=False): """ Validate your FOMOD installer. Raises ValidationError if installer is not valid. :param elem_tree: The root element of your config xml tree. :param schema_file: The path to the schema file, with filename and extension. :param ignore_errors: If true, the function returns False instead of throwing an error. """ try: xmlschema_doc = etree.parse(schema_file) xmlschema = etree.XMLSchema(xmlschema_doc) xmlschema.assertValid(elem_tree) return True except etree.ParseError as e: raise ParserError(str(e)) except etree.DocumentInvalid as e: if ignore_errors: return False raise ValidationError("The Config tree is invalid with error message:\n\n" + str(e))
def xml_parse_file(self, file_object): """ Return the root of the XML parsed tree from the file object. If there is a parsing error, print the surrounding environment and raise an exception.""" try: e = ET.parse(file_object).getroot() except ET.ParseError as e: # in case of a parsing error, print the environment that caused # the failure: m = re.search(r"line (\d*), column (\d*)", str(e)) if m: line = int(m.group(1)) column = int(m.group(2)) start_line = max(0, line - 5) end_line = line + 5 else: start_line = 0 end_line = 999999 #S = S.splitlines() S = [] self.logger.error(e) for i, x in enumerate(S): if i > start_line: warnings.warn("{:<3}: {}".format(i, x.decode("utf8"))) if i == line - 1: warnings.warn(" " + " " * (column - 1) + "^") if i > end_line: break raise e return e
def _parse(self, result): """Create an XMLSerializer from an HTML string, if needed.""" content_type = self.request.response.getHeader('Content-Type') if not content_type or not content_type.startswith('text/html'): return try: return getHTMLSerializer(result) except (AttributeError, TypeError, etree.ParseError): return
def action_importXML(self, *arg) : filechooserdialog = gtk.FileChooserDialog(_("Import project"), None, \ gtk.FILE_CHOOSER_ACTION_OPEN, (gtk.STOCK_CANCEL, \ gtk.RESPONSE_CANCEL, gtk.STOCK_OK, gtk.RESPONSE_OK)) try: filt = gtk.FileFilter() filt.set_name(_("NativeCAM projects")) filt.add_mime_type("text/xml") filt.add_pattern("*.xml") filechooserdialog.add_filter(filt) filt = gtk.FileFilter() filt.set_name(_("All files")) filt.add_pattern("*") filechooserdialog.add_filter(filt) filechooserdialog.set_current_folder(os.path.join(NCAM_DIR, CATALOGS_DIR, self.catalog_dir, PROJECTS_DIR)) filechooserdialog.set_keep_above(True) filechooserdialog.set_transient_for(self.get_toplevel()) if filechooserdialog.run() == gtk.RESPONSE_OK: fname = filechooserdialog.get_filename() try : xml = self.update_features(etree.parse(fname).getroot()) self.import_xml(xml) self.file_changed = True except etree.ParseError as err : mess_dlg(err, _("Import project")) finally: filechooserdialog.destroy() # will update with new features version and keep the previous values