@Override public List<String> readLines(URL url) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); try (InputStream in = new BOMInputStream(url.openStream());) { int tmp = -1; while ((tmp = in.read()) != -1) { out.write(tmp); } } String string = out.toString(); LOGGER.debug(string); XMLDiffFormatter xmlFormatter = new XMLDiffFormatter(); String format = xmlFormatter.format(string); return Stream.of(format.split("\n")).collect(Collectors.toList()); }
private BufferedReader getBufferedReader() throws IOException { String extension; Charset charset; if (FileUtil.isUrl(uri)) { extension = FileUtil.getUrlExtension(uri).toLowerCase(PMS.getLocale()); } else { extension = FileUtil.getExtension(uri).toLowerCase(PMS.getLocale()); } if (extension != null && (extension.equals("m3u8") || extension.equals(".cue"))) { charset = StandardCharsets.UTF_8; } else { charset = StandardCharsets.ISO_8859_1; } if (FileUtil.isUrl(uri)) { return new BufferedReader(new InputStreamReader(new BOMInputStream(new URL(uri).openStream()), charset)); } else { File playlistfile = new File(uri); if (playlistfile.length() < 10000000) { return new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(playlistfile)), charset)); } } return null; }
/** * Get content from {@link java.nio.file.Path} using UTF8 * * @param path * @return * @throws CommandException */ public String getFileContent(Path path) throws CommandException { try { File file = path.toFile(); BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(file), false, boms); String fileContent; if (inputStream.hasBOM()) { fileContent = IOUtils.toString(inputStream, inputStream.getBOMCharsetName()); } else { fileContent = IOUtils.toString(inputStream, StandardCharsets.UTF_8); } return fileContent; } catch (IOException e) { throw new CommandException("Cannot get file content for path: " + path.toString(), e); } }
@Test public void test() throws IOException { ApacheCommonsUtils acu = new ApacheCommonsUtils(); FileInputStream fis = new FileInputStream(new File("src/test/resources/lotl_utf-8-sansbom.xml")); FileInputStream fisBom = new FileInputStream(new File("src/test/resources/lotl_utf-8.xml")); assertNotEquals(acu.toBase64(acu.toByteArray(fis)), acu.toBase64(acu.toByteArray(fisBom))); fis = new FileInputStream(new File("src/test/resources/lotl_utf-8-sansbom.xml")); fisBom = new FileInputStream(new File("src/test/resources/lotl_utf-8.xml")); BOMInputStream bomIS = new BOMInputStream(fis); BOMInputStream bomISSkipped = new BOMInputStream(fisBom); assertEquals(acu.toBase64(acu.toByteArray(bomIS)), acu.toBase64(acu.toByteArray(bomISSkipped))); }
/** * Process a text-based Sitemap. Text sitemaps only list URLs but no * priorities, last mods, etc. * * @param sitemapUrl * URL to sitemap file * @param stream * content stream * @return The site map * @throws IOException * if there is an error reading in the site map content */ protected SiteMap processText(URL sitemapUrl, InputStream stream) throws IOException { LOG.debug("Processing textual Sitemap"); SiteMap textSiteMap = new SiteMap(sitemapUrl); textSiteMap.setType(SitemapType.TEXT); BOMInputStream bomIs = new BOMInputStream(stream); @SuppressWarnings("resource") BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, UTF_8)); String line; int i = 1; while ((line = reader.readLine()) != null) { if (line.length() > 0 && i <= MAX_URLS) { addUrlIntoSitemap(line, textSiteMap, null, null, null, i++); } } textSiteMap.setProcessed(true); return textSiteMap; }
/** * Decompress the gzipped content and process the resulting XML Sitemap. * * @param url * - URL of the gzipped content * @param response * - Gzipped content * @return the site map * @throws UnknownFormatException * if there is an error parsing the gzip * @throws IOException * if there is an error reading in the gzip {@link java.net.URL} */ protected AbstractSiteMap processGzippedXML(URL url, byte[] response) throws IOException, UnknownFormatException { LOG.debug("Processing gzipped XML"); InputStream is = new ByteArrayInputStream(response); // Remove .gz ending String xmlUrl = url.toString().replaceFirst("\\.gz$", ""); LOG.debug("XML url = {}", xmlUrl); BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is)); InputSource in = new InputSource(decompressed); in.setSystemId(xmlUrl); return processXml(url, in); }
public InputTextSubFile(SubtitleFileType inputFormat, String fileName, InputStream is) throws InputTextSubException, IOException { try { tto = createFormat(inputFormat).parseFile(fileName, new BOMInputStream(is)); } catch (FatalParsingException ex) { throw new InputTextSubException( "Parse error returned by subtitle read library", ex); } captions = new ArrayList<InputSubtitleLine>(tto.captions.size()); for (Caption caption : tto.captions.values()) { InputSubtitleLine line = new InputSubtitleLine(); line.setContent(caption.content); line.setStartTime(new SubtitleFileTimeWrapper(caption.start) .getMSeconds()); line.setEndTime(new SubtitleFileTimeWrapper(caption.end) .getMSeconds()); captions.add(line); } }
/** * Opens a CSV file. * * If the given file ends with "gz", then the file is decompressed before using a {@link GZIPInputStream}. * * @param importFile * the csv file * @return a list reader * @throws IOException * on io exception */ @SuppressWarnings("resource") protected CsvListReader openCsvListReader(final File importFile) throws IOException { // Open file InputStream fileStream = new FileInputStream(importFile); // Check for compressed file if (importFile.getName().toLowerCase().endsWith(".gz")) { fileStream = new GZIPInputStream(fileStream); } // Guess the encoding final BOMInputStream inputStream = new BOMInputStream(fileStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); final String charset; if (inputStream.hasBOM()) { charset = inputStream.getBOMCharsetName(); log.info("BOM detected. Using {} as encoding", charset); } else { charset = getDefaultEncoding().toString(); log.info("No BOM detected. Assuming {} as encoding", charset); } final Reader reader = new InputStreamReader(inputStream, charset); return new CsvListReader(reader, new CsvPreference.Builder(CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE) .skipComments(new CommentMatches("(//|/\\*|#|;).*")).build()); }
/** * Simple invocation. Load template and data from a file, run process * and return memory model containing results or null if there was a problem. * Problems/progress reporting live to given reporter * @param templateFile the name of the template file to use * @param dataFile the name of the data file to process * @param report the message reporter * @param debug set to true to enable voluminous debug message * @param allowNullRows set to true to allow output even if some rows don't match * @throws IOException */ public Model simpleConvert(String templateFile, String dataFile, ProgressMonitorReporter reporter, boolean debug, boolean allowNullRows) throws IOException { Template template = TemplateFactory.templateFrom(templateFile, dc); File dataFileF = new File(dataFile); String filename = dataFileF.getName(); String filebasename = NameUtils.removeExtension(filename); put(ConverterProcess.FILE_NAME, filename); put(ConverterProcess.FILE_BASE_NAME, filebasename); InputStream is = new BOMInputStream( new FileInputStream(dataFileF) ); ConverterProcess process = new ConverterProcess(dc, is); process.setDebug(debug); process.setTemplate( template ); process.setMessageReporter( reporter ); process.setAllowNullRows(allowNullRows); boolean ok = process.process(); return ok ? process.getModel() : null; }
/** * Generate a single Jena model from several different files, output it to * specified OutputStream * @param aggr String[] String array containing all relevant RDF files "name.extension" * @param out OutputStream * @param type an instance of ScDemoFile class * @throws IOException */ public static Model generateAggregateModel(String[] aggr, String lang) throws IOException { Model model = ModelFactory.createDefaultModel(); Model subModel = ModelFactory.createDefaultModel(); for (int i=0; i<aggr.length; i++) { InputStream in = (JsonldJenaUtils.class).getClassLoader().getResourceAsStream( aggr[i]); BOMInputStream bIn = new BOMInputStream(in, false); subModel.read(bIn, null, lang); model = model.add(subModel); subModel.removeAll(); bIn.close(); in.close(); } return model; }
/** * Decompress the gzipped content and process the resulting XML Sitemap. * * @param url - URL of the gzipped content * @param response - Gzipped content * @throws MalformedURLException * @throws IOException * @throws UnknownFormatException */ private AbstractSiteMap processGzip(URL url, byte[] response) throws MalformedURLException, IOException, UnknownFormatException { logger.debug("Processing gzip"); AbstractSiteMap smi; InputStream is = new ByteArrayInputStream(response); // Remove .gz ending String xmlUrl = url.toString().replaceFirst("\\.gz$", ""); logger.debug("XML url = " + xmlUrl); BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is)); InputSource in = new InputSource(decompressed); in.setSystemId(xmlUrl); smi = processXml(url, in); decompressed.close(); return smi; }
private String showByteOfMark(InputStream source) throws IOException { ByteOrderMark detectedBOM = new BOMInputStream(source).getBOM(); if (detectedBOM == null) { return ""; } String bom = detectedBOM.toString(); FileType.logger.log(Level.INFO, "BOM: {0}", bom); return " w/ " + bom; }
private static InputStream streamFile(File file) { try { return new BOMInputStream(new FileInputStream(file), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); } catch (FileNotFoundException e) { throw new IllegalStateException("File not found: " + file.getAbsolutePath(), e); } }
private CSVParser createCsvParser(String inputFileName, String delimiter) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(inputFileName)), Charsets.UTF_8)); CSVFormat format = CSVFormat.newFormat(delimiter.charAt(0)) .withSkipHeaderRecord() .withIgnoreEmptyLines() .withAllowMissingColumnNames() .withQuote('"') .withHeader(); return new CSVParser(reader, format); }
public SubtitleFile(String fileName, String fileContents, String stylesStr) throws IOException, FatalParsingException { LOGGER.debug("Parsing subtitle file {}", fileName); TimedTextFileFormat timedTextFormat; switch (FilenameUtils.getExtension(fileName)) { case "ass": timedTextFormat = new FormatASS(); break; case "srt": timedTextFormat = new FormatSRT(); break; default: LOGGER.error("invalid subtitle file extension file: {}", fileName); throw new UnexpectedError(); } // Convert String to InputStream to match subtitleFile API byte[] byteData = fileContents.getBytes("UTF-8"); // Must use BOMInputStream otherwise files with BOM will broke :((( // => http://stackoverflow.com/questions/4897876/reading-utf-8-bom-marker try (BOMInputStream inputStream = new BOMInputStream(new ByteArrayInputStream(byteData))) { timedText = timedTextFormat.parseFile(fileName, inputStream, StandardCharsets.UTF_8); } if (timedText.warnings.length() > "List of non fatal errors produced during parsing:\n\n".length()) { LOGGER.warn("There was some warnings during parsing. See logs."); LOGGER.debug("Got warnings: {}", "\n" + timedText.warnings); } styles = parseStyles(stylesStr); timedText.styling = styles; timedText.description = JIJIMAKU_SIGNATURE; annotationCaptions = new TreeMap<>(); // Initialization: add jijimaku mark and set style to Default addJijimakuMark(); timedText.captions.values().stream().forEach(c -> c.style = styles.get("Default")); captionIter = timedText.captions.entrySet().iterator(); }
/** * Writes the content into a file using same format as source file * * @param content content to be written * @param path path to the file * @param sourceFileMatch * @throws CommandException */ public void writeFileContent(String content, Path path, FileMatch sourceFileMatch) throws CommandException { try { File outputFile = path.toFile(); BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(sourceFileMatch.getPath().toFile()), false, boms); if (inputStream.hasBOM()) { FileUtils.writeByteArrayToFile(outputFile, inputStream.getBOM().getBytes()); FileUtils.writeByteArrayToFile(outputFile, content.getBytes(inputStream.getBOMCharsetName()), true); } else { FileUtils.writeStringToFile(outputFile, content, StandardCharsets.UTF_8); } } catch (IOException e) { throw new CommandException("Cannot write file content in path: " + path.toString(), e); } }
/** * Read the first bytes of the given file and try to determine the file * format. Read up to 100 KB before giving up. * @param f the file to read * @return the file format (or <code>null</code> if the format * could not be determined) * @throws IOException if the input stream could not be read */ public static String detect(File f) throws IOException { if (!f.exists()) { return null; } try (BufferedInputStream bis = new BufferedInputStream(new BOMInputStream( new FileInputStream(f)))) { return determineFileFormat(bis); } }
private Path saveFile(MultipartFile file) throws IOException { // Get the filename and build the local file path String filename = file.getOriginalFilename(); String directory = System.getProperty("java.io.tmpdir"); String filepath = Paths.get(directory, filename).toString(); // Save the file locally try (BufferedOutputStream stream = new BufferedOutputStream(new FileOutputStream(new File(filepath))); BOMInputStream bis = new BOMInputStream(file.getInputStream(), false)) { IOUtils.copy(bis, stream); } return Paths.get(filepath); }
public FileDownloaderWithBOM(WebzInputStreamDownloader downloader, String defaultEncoding) throws IOException, WebzException { this.bomIn = (BOMInputStream) new BOMInputStream(downloader.getInputStream(), false, ALL_BOMS); this.downloader = new FileDownloader(downloader.getFileSpecific(), bomIn); ByteOrderMark bom = bomIn.getBOM(); if (bom == null) { actualEncoding = defaultEncoding; actualNumberOfBytes = downloader.getFileSpecific().getNumberOfBytes(); } else { actualEncoding = bom.getCharsetName(); actualNumberOfBytes = downloader.getFileSpecific().getNumberOfBytes() - bom.length(); } reader = new InputStreamReader(bomIn, actualEncoding); }
public static InputStreamReader getInputStreamReader(File file, String encoding) throws IOException { FileInputStream fis = new FileInputStream(file); logger.debug("Reading file: " + file + " using encoding: " + encoding); BOMInputStream bis = new BOMInputStream(fis); //So that we can remove the BOM return new InputStreamReader(bis, encoding); }
/** * Read the provided meta descriptor (e.g. meta.xml) and return a {@link Archive}. * @param metaDescriptor * @throws SAXException * @throws IOException * @throws UnsupportedArchiveException * @return a new {@link Archive}, never null */ public static Archive fromMetaDescriptor(InputStream metaDescriptor) throws SAXException, IOException, UnsupportedArchiveException { Archive archive = new Archive(); try (BOMInputStream bomInputStream = new BOMInputStream(metaDescriptor)) { SAXParser p = SAX_FACTORY.newSAXParser(); MetaXMLSaxHandler mh = new MetaXMLSaxHandler(archive); p.parse(bomInputStream, mh); } catch (ParserConfigurationException e) { throw new SAXException(e); } return archive; }
private void extractMimeType(ZipEntry entry) { try { InputStream zipFileInputStream = getZipEntryInputStream(entry); BOMInputStream bomInputStream = new BOMInputStream(zipFileInputStream); DSSDocument document = new InMemoryDocument(bomInputStream); mimeType = StringUtils.trim(IOUtils.toString(getDocumentBytes(document), "UTF-8")); extractAsicEntry(entry, document); } catch (IOException e) { logger.error("Error parsing container mime type: " + e.getMessage()); throw new TechnicalException("Error parsing container mime type: " + e.getMessage(), e); } }
/** * Tries to fetch POM model from maven central for a given dependency * @param dependency dependency to fetch model to * @return POM model if found and valid * @throws IOException * @throws XmlPullParserException */ private static Model fetchModel(RawDependency dependency) throws IOException, XmlPullParserException { // Get the url to the POM file for this artifact String url = "http://central.maven.org/maven2/" + dependency.groupID.replace('.', '/') + '/' + dependency.artifactID + '/' + dependency.version + '/' + dependency.artifactID + '-' + dependency.version + ".pom"; InputStream input = new BOMInputStream(new URL(url).openStream()); MavenXpp3Reader xpp3Reader = new MavenXpp3Reader(); Model model = xpp3Reader.read(input); input.close(); return model; }
@Test public void testBOMInputStream_ParserWithInputStream() throws IOException { try (final BOMInputStream inputStream = createBOMInputStream("CSVFileParser/bom.csv"); final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) { for (final CSVRecord record : parser) { final String string = record.get("Date"); Assert.assertNotNull(string); // System.out.println("date: " + record.get("Date")); } } }
private static Charset charsetFor(BOMInputStream bis) throws IOException { ByteOrderMark bom = bis.getBOM(); if (ByteOrderMark.UTF_8.equals(bom)) { return StandardCharsets.UTF_8; } if (ByteOrderMark.UTF_16LE.equals(bom)) { return StandardCharsets.UTF_16LE; } if (ByteOrderMark.UTF_16BE.equals(bom)) { return StandardCharsets.UTF_16BE; } return StandardCharsets.UTF_8; }
public void run(File file, Seq seq, IDatabase database) throws Exception { this.seq = seq; this.database = database; CsvPreference pref = new CsvPreference.Builder('"', ';', "\n").build(); try (FileInputStream fis = new FileInputStream(file); // exclude the byte order mark, if there is any BOMInputStream bom = new BOMInputStream(fis, false, ByteOrderMark.UTF_8); InputStreamReader reader = new InputStreamReader(bom, "utf-8"); BufferedReader buffer = new BufferedReader(reader); CsvListReader csvReader = new CsvListReader(buffer, pref)) { importFile(csvReader, database); } }
private static CsvListReader createReader(InputStream stream) throws Exception { CsvPreference pref = new CsvPreference.Builder('"', ';', "\n").build(); // exclude the byte order mark, if there is any BOMInputStream bom = new BOMInputStream(stream, false, ByteOrderMark.UTF_8); InputStreamReader reader = new InputStreamReader(bom, "utf-8"); BufferedReader buffer = new BufferedReader(reader); CsvListReader csvReader = new CsvListReader(buffer, pref); return csvReader; }
/** * Detects any BOMs and returns the corresponding charset */ private static String getCharsetFromBOM(final byte[] byteData) { BOMInputStream bomIn = new BOMInputStream(new ByteArrayInputStream( byteData)); try { ByteOrderMark bom = bomIn.getBOM(); if (bom != null) { return bom.getCharsetName(); } } catch (IOException e) { return null; } return null; }
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws KettleException { String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() ); String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() ); String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() ); try ( FileObject fileObject = KettleVFS.getFileObject( fileName, getTransMeta() ); BOMInputStream inputStream = new BOMInputStream( KettleVFS.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE ) ) { InputStreamReader reader = null; if ( Utils.isEmpty( realEncoding ) ) { reader = new InputStreamReader( inputStream ); } else { reader = new InputStreamReader( inputStream, realEncoding ); } EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() ); String line = TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ); String[] fieldNames = CsvInput.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() ); if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) { removeEnclosure( fieldNames, csvInputMeta.getEnclosure() ); } trimFieldNames( fieldNames ); return fieldNames; } catch ( IOException e ) { throw new KettleFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e ); } }
/** * Process a text-based Sitemap. Text sitemaps only list URLs but no priorities, last mods, etc. * * @param content * @throws IOException */ private SiteMap processText(byte[] content, String sitemapUrl) throws IOException { logger.debug("Processing textual Sitemap"); SiteMap textSiteMap = new SiteMap(sitemapUrl); textSiteMap.setType(SitemapType.TEXT); BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content)); @SuppressWarnings("resource") BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs)); String line; int i = 1; while ((line = reader.readLine()) != null) { if (line.length() > 0 && i <= MAX_URLS) { try { URL url = new URL(line); boolean valid = urlIsLegal(textSiteMap.getBaseUrl(), url.toString()); if (valid || !strict) { if (logger.isDebugEnabled()) { StringBuffer sb = new StringBuffer(" "); sb.append(i).append(". ").append(url); logger.debug(sb.toString()); } i++; SiteMapURL surl = new SiteMapURL(url, valid); textSiteMap.addSiteMapUrl(surl); } } catch (MalformedURLException e) { logger.debug("Bad URL [" + line + "]."); } } } textSiteMap.setProcessed(true); return textSiteMap; }
private void loadFile(File file) { try (FileInputStream inputStream = new FileInputStream(file)) { BOMInputStream bomIn = new BOMInputStream(inputStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); String charsetName; if (bomIn.hasBOM()) { bom = bomIn.getBOM(); charsetName = bom.getCharsetName(); } else { // No BOM found bom = null; charsetName = null; } String fileContents = StringTools.readStream(bomIn, charsetName); textArea.setText(fileContents); currentFile = file; updateTitle(); if(recentFiles.contains(file.getAbsolutePath())) { recentFiles.remove(file.getAbsolutePath()); } recentFiles.add(file.getAbsolutePath()); localStorage.saveProperty("recentFiles", recentFiles); updateRecentFilesMenu(); } catch (IOException e) { Tools.showError(e); } }
private InputStreamReader getInputStreamReader(String filename, String encoding) throws IOException { String charsetName = encoding != null ? encoding : Charset.defaultCharset().name(); InputStream is = System.in; if (!isStdIn(filename)) { is = new FileInputStream(new File(filename)); BOMInputStream bomIn = new BOMInputStream(is, true, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE,ByteOrderMark.UTF_32LE); if (bomIn.hasBOM() && encoding == null) { charsetName = bomIn.getBOMCharsetName(); } is = bomIn; } return new InputStreamReader(new BufferedInputStream(is), charsetName); }
private TypedInputStream openConnectionCheckRedirects(URLConnection c) throws IOException { boolean redir; int redirects = 0; InputStream in = null; String contentType = null; String contentEncoding = null; do { if (c instanceof HttpURLConnection) { ((HttpURLConnection) c).setInstanceFollowRedirects(false); } // We want to open the input stream before getting headers // because getHeaderField() et al swallow IOExceptions. in = new BufferedInputStream(new BOMInputStream(c.getInputStream())); contentType = c.getContentType(); contentEncoding = c.getContentEncoding(); redir = false; if (c instanceof HttpURLConnection) { HttpURLConnection http = (HttpURLConnection) c; int stat = http.getResponseCode(); if (stat >= 300 && stat <= 307 && stat != 306 && stat != HttpURLConnection.HTTP_NOT_MODIFIED) { URL base = http.getURL(); String loc = http.getHeaderField("Location"); URL target = null; if (loc != null) { target = new URL(base, loc); } http.disconnect(); // Redirection should be allowed only for HTTP and HTTPS // and should be limited to 5 redirections at most. if (target == null || !(target.getProtocol().equals("http") || target.getProtocol().equals("https")) || c.getURL().getProtocol().equals("https") && target.getProtocol().equals("http") || redirects >= 5) { throw new SecurityException("illegal URL redirect"); } redir = true; c = target.openConnection(); redirects++; } } } while (redir); if(contentType==null) { contentType = "text/plain"; } return new TypedInputStream(in, contentType, contentEncoding); }
public static DataTable loadCSV(String fileName, String formatType, VariableType[] colTypesOverride, String[] colNamesOverride, boolean hasHeaderRow) { try { // use apache commons io + csv to load but convert to list of String[] // byte-order markers are handled if present at start of file. FileInputStream fis = new FileInputStream(fileName); final Reader reader = new InputStreamReader(new BOMInputStream(fis), "UTF-8"); CSVFormat format; if ( formatType==null ) { format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180; } else { switch ( formatType.toLowerCase() ) { case "tsv": format = hasHeaderRow ? CSVFormat.TDF.withHeader() : CSVFormat.TDF; break; case "mysql": format = hasHeaderRow ? CSVFormat.MYSQL.withHeader() : CSVFormat.MYSQL; break; case "excel": format = hasHeaderRow ? CSVFormat.EXCEL.withHeader() : CSVFormat.EXCEL; break; case "rfc4180": default: format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180; break; } } final CSVParser parser = new CSVParser(reader, format); List<String[]> rows = new ArrayList<>(); int numHeaderNames = parser.getHeaderMap().size(); try { for (final CSVRecord record : parser) { String[] row = new String[record.size()]; for (int j = 0; j<record.size(); j++) { row[j] = record.get(j); } rows.add(row); } } finally { parser.close(); reader.close(); } VariableType[] actualTypes = computeColTypes(rows, numHeaderNames); Set<String> colNameSet = parser.getHeaderMap().keySet(); String[] colNames = colNameSet.toArray(new String[colNameSet.size()]); if ( colNamesOverride!=null ) { colNames = colNamesOverride; } if ( colTypesOverride!=null ) { actualTypes = colTypesOverride; } return fromStrings(rows, actualTypes, colNames, false); } catch (Exception e) { throw new IllegalArgumentException("Can't open and/or read "+fileName, e); } }
public static DataTable loadCSV(String fileName, VariableType[] colTypes, boolean hasHeaderRow) { int numCols = colTypes.length; try { final FileInputStream fis = new FileInputStream(fileName); final Reader r = new InputStreamReader(new BOMInputStream(fis), "UTF-8"); final BufferedReader bf = new BufferedReader(r); List<int[]> rows = new ArrayList<>(); String line; String[] colNames = null; if ( hasHeaderRow ) { line=bf.readLine(); if ( line!=null ) { line = line.trim(); if ( line.length()>0 ) { colNames = line.split(","); for (int i = 0; i<colNames.length; i++) { colNames[i] = colNames[i].trim(); } } } } int n = 0; while ( (line=bf.readLine())!=null ) { if ( n>0 && n % 10000 == 0 ) System.out.println(n); line = line.trim(); if ( line.length()==0 ) continue; int[] row = new int[numCols]; int comma = line.indexOf(',', 0); int prev = 0; int col = 0; while ( comma>=0 ) { String v = line.substring(prev, comma); row[col] = getValue(colTypes[col], v); prev = comma+1; comma = line.indexOf(',', comma+1); col++; } // grab last element after last comma String lastv = line.substring(prev, line.length()); row[col] = getValue(colTypes[col], lastv); // System.out.println(); rows.add(row); n++; } DataTable data = new DataTable(rows, colTypes, colNames, null); return data; } catch (IOException ioe) { throw new IllegalArgumentException("Can't open and/or read "+fileName, ioe); } }
private List<RefCCDAValidationResult> runValidators(String validationObjective, String referenceFileName, MultipartFile ccdaFile) throws SAXException, Exception { List<RefCCDAValidationResult> validatorResults = new ArrayList<>(); InputStream ccdaFileInputStream = null; try { ccdaFileInputStream = ccdaFile.getInputStream(); String ccdaFileContents = IOUtils.toString(new BOMInputStream(ccdaFileInputStream)); List<RefCCDAValidationResult> mdhtResults = doMDHTValidation(validationObjective, referenceFileName, ccdaFileContents); if(mdhtResults != null && !mdhtResults.isEmpty()) { logger.info("Adding MDHT results"); validatorResults.addAll(mdhtResults); } boolean isSchemaErrorInMdhtResults = mdhtResultsHaveSchemaError(mdhtResults); boolean isObjectiveAllowingVocabularyValidation = objectiveAllowsVocabularyValidation(validationObjective); if (!isSchemaErrorInMdhtResults && isObjectiveAllowingVocabularyValidation) { List<RefCCDAValidationResult> vocabResults = doVocabularyValidation(validationObjective, referenceFileName, ccdaFileContents); if(vocabResults != null && !vocabResults.isEmpty()) { logger.info("Adding Vocabulary results"); validatorResults.addAll(vocabResults); } if(objectiveAllowsContentValidation(validationObjective)) { List<RefCCDAValidationResult> contentResults = doContentValidation(validationObjective, referenceFileName, ccdaFileContents); if(contentResults != null && !contentResults.isEmpty()) { logger.info("Adding Content results"); validatorResults.addAll(contentResults); } } else { logger.info("Skipping Content validation due to: " + "validationObjective (" + (validationObjective != null ? validationObjective : "null objective") + ") is not relevant or valid for Content validation"); } } else { String separator = !isObjectiveAllowingVocabularyValidation && isSchemaErrorInMdhtResults ? " and " : ""; logger.info("Skipping Vocabulary (and thus Content) validation due to: " + (isObjectiveAllowingVocabularyValidation ? "" : "validationObjective POSTed: " + (validationObjective != null ? validationObjective : "null objective") + separator) + (isSchemaErrorInMdhtResults ? "C-CDA Schema error(s) found" : "")); } } catch (IOException e) { throw new RuntimeException("Error getting CCDA contents from provided file", e); }finally { closeFileInputStream(ccdaFileInputStream); } return validatorResults; }
/** * The main entry point into an Entity.Loader. Interprets each row of a CSV file within a zip file as a sinle * GTFS entity, and loads them into a table. * * @param zip the zip file from which to read a table */ public void loadTable(ZipFile zip) throws IOException { ZipEntry entry = zip.getEntry(tableName + ".txt"); if (entry == null) { Enumeration<? extends ZipEntry> entries = zip.entries(); // check if table is contained within sub-directory while (entries.hasMoreElements()) { ZipEntry e = entries.nextElement(); if (e.getName().endsWith(tableName + ".txt")) { entry = e; feed.errors.add(new TableInSubdirectoryError(tableName, entry.getName().replace(tableName + ".txt", ""))); } } /* This GTFS table did not exist in the zip. */ if (this.isRequired()) { feed.errors.add(new MissingTableError(tableName)); } else { LOG.info("Table {} was missing but it is not required.", tableName); } if (entry == null) return; } LOG.info("Loading GTFS table {} from {}", tableName, entry); InputStream zis = zip.getInputStream(entry); // skip any byte order mark that may be present. Files must be UTF-8, // but the GTFS spec says that "files that include the UTF byte order mark are acceptable" InputStream bis = new BOMInputStream(zis); CsvReader reader = new CsvReader(bis, ',', Charset.forName("UTF8")); this.reader = reader; boolean hasHeaders = reader.readHeaders(); if (!hasHeaders) { feed.errors.add(new EmptyTableError(tableName)); } while (reader.readRecord()) { // reader.getCurrentRecord() is zero-based and does not include the header line, keep our own row count if (++row % 500000 == 0) { LOG.info("Record number {}", human(row)); } loadOneRow(); // Call subclass method to produce an entity from the current row. } if (row == 0) { feed.errors.add(new EmptyTableError(tableName)); } }
@Override public BufferedReader create(Resource resource, String encoding) throws UnsupportedEncodingException, IOException { BOMInputStream bomInputStream = new BOMInputStream(resource.getInputStream()); return new BufferedReader(new InputStreamReader(bomInputStream, encoding)); }
public static InputStreamReader getInputStreamReader(InputStream is, String encoding) throws IOException { logger.debug("Reading stream: using encoding: " + encoding); BOMInputStream bis = new BOMInputStream(is); //So that we can remove the BOM return new InputStreamReader(bis, encoding); }