private HashMap<String, Map<String, String>> parseHintFile(Integer exercise, Integer exercise_order) { final HashMap<String, Map<String, String>> list; final CSVParser parser; final URL resource; final CSVFormat csvFormat; final Charset charset; list = new HashMap<String, Map<String, String>> (); try { resource = ResourceHelper.getResource(BuenOjoFileUtils.GAME_RESOURCES_INPUT_DIR ,this.gamePath,this.setPath,exercise.toString(),exercise_order.toString(),"xy_pista.csv"); charset= FileEncodingDetectorHelper.guessEncodingAndGetCharset(resource); csvFormat = CSVFormatHelper.getDefaultCSVFormat(); parser = CSVParser.parse(resource, charset, csvFormat); for (CSVRecord record : parser ) list.put(record.get("id").trim() , (Map<String, String>)record.toMap()); } catch (IOException e) { log.error("Fail",e); } return list; }
@Override public T read() throws MintleafException { final CSVParser parser; try { parser = getCSVParser(); int i = 0; for (CSVRecord record : parser) { Row row = createRowInstance(record); if (!readRow(i++, row)) { break; } } } catch (IOException e) { throw new MintleafException(e); } return null; }
private ArrayList<Map<String, String>> parse() { final ArrayList<Map<String, String>> list; final CSVParser parser; final URL resource; final CSVFormat csvFormat; final Charset charset; list = new ArrayList<Map<String, String>> (); try { resource = ResourceHelper.getResource(isFromGameResourceInput(),fileName); charset= FileEncodingDetectorHelper.guessEncodingAndGetCharset(resource); csvFormat = CSVFormatHelper.getDefaultCSVFormat(); parser = CSVParser.parse(resource, charset, csvFormat); for (CSVRecord record : parser ) list.add((Map<String, String>)record.toMap()); } catch (IOException e) { log.error("Fail", e); } return list; }
private ArrayList<Map<String,String>> parse() { ArrayList<Map<String,String>> list; CSVParser parser; URL resource; CSVFormat csvFormat; Charset charset; list = new ArrayList<Map<String,String>> (); try { resource = ResourceHelper.getResource(isFromGameResourceInput(),fileName); charset= FileEncodingDetectorHelper.guessEncodingAndGetCharset(resource); csvFormat = CSVFormatHelper.getDefaultCSVFormat(); parser = CSVParser.parse(resource, charset, csvFormat); for (CSVRecord record : parser ) list.add(record.toMap()); } catch (IOException e) { log.error("Fail", e); } return list; }
private Map<String,String> parseDelimitedAreaFile(Integer exercise, Integer exercise_order) { Map<String,String> list; CSVParser parser; URL resource; CSVFormat csvFormat; Charset charset; list = new HashMap<String,String>(); try { resource =ResourceHelper.getResource(isFromGameResourceInput(),this.gamePath,this.setPath,exercise.toString(), exercise_order.toString(),"areaDelimitada.csv"); charset= FileEncodingDetectorHelper.guessEncodingAndGetCharset(resource); csvFormat = CSVFormatHelper.getDefaultCSVFormat(); parser = CSVParser.parse(resource, charset, csvFormat); for (CSVRecord record : parser ) list = record.toMap(); } catch (IOException e) { log.error("Fail",e); } return list; }
private List <Tag> createTags(CSVParser parser, Course course) { tagMap = new HashMap<>(); ArrayList<Tag> tagList = new ArrayList<Tag>(); for (CSVRecord csvRecord : parser) { String name = csvRecord.get(TagPoolColumn.TAG.ordinal()).toString(); if (!tagMap.containsKey(name)){ Tag tag = new Tag(); tag.setName(name); tag.setCourse(course); tagMap.put(name, tag); tag.setNumber(tagMap.size()); tagList.add(tag); } } return tagList; }
public List<Map<String,String>> parse() throws BuenOjoCSVParserException { List<Map<String,String>> list = new ArrayList<>(); CSVParser parser = null; try { parser = CSVFormat.RFC4180.withHeader() .withDelimiter(',') .withAllowMissingColumnNames(true) .parse(new InputStreamReader(this.inputStreamSource.getInputStream())); } catch (IOException e) { throw new BuenOjoCSVParserException(e.getMessage()); } for (CSVRecord record :parser) { Map<String,String> map = record.toMap(); list.add(map); } return list; }
public List<PhotoLocationSightPair> parse () throws IOException, BuenOjoCSVParserException { CSVParser parser = CSVFormat.RFC4180.withHeader().withDelimiter(',').withAllowMissingColumnNames(true).parse(new InputStreamReader(this.inputStreamSource.getInputStream())); List<CSVRecord> records = parser.getRecords(); if (records.size() == 0 ) { throw new BuenOjoCSVParserException("El archivos de miras no contiene registros"); } ArrayList<PhotoLocationSightPair> sightPairs = new ArrayList<>(records.size()); for (CSVRecord record : records) { PhotoLocationSightPair sight = new PhotoLocationSightPair(); sight.setNumber(new Integer(record.get(PhotoLocationSightPairCSVColumn.id))); sight.setSatelliteX(new Integer(record.get(PhotoLocationSightPairCSVColumn.satCol))); sight.setSatelliteY(new Integer(record.get(PhotoLocationSightPairCSVColumn.satRow))); sight.setSatelliteTolerance(new Integer(record.get(PhotoLocationSightPairCSVColumn.satTolerancia))); sight.setTerrainX(new Integer(record.get(PhotoLocationSightPairCSVColumn.terCol))); sight.setTerrainY(new Integer(record.get(PhotoLocationSightPairCSVColumn.terRow))); sight.setTerrainTolerance(new Integer(record.get(PhotoLocationSightPairCSVColumn.terTolerancia))); sightPairs.add(sight); } return sightPairs; }
public List<TagPair> parse() throws IOException { CSVParser parser = CSVFormat.RFC4180.withHeader().withDelimiter(',').withAllowMissingColumnNames(true).parse(new InputStreamReader(this.inputStreamSource.getInputStream())); ArrayList<TagPair> tagPairs = new ArrayList<>(AVG_ITEMS); for (CSVRecord record : parser ){ TagPair pair = new TagPair(); Integer tagSlotId = new Integer(record.get("id")); Integer tagNumber = new Integer(record.get("etiqueta")); pair.setTagSlotId(tagSlotId); Optional<Tag> optionalTag = tagList.stream().filter(isEqualToTagNumber(tagNumber)).findFirst(); if (optionalTag.isPresent()){ Tag tag = optionalTag.get(); pair.setTag(tag); tagPairs.add(pair); }else { log.debug("Attempt to get invalid tag with number: "+tagNumber); } } return tagPairs; }
public PhotoLocationBeacon parse() throws IOException, BuenOjoCSVParserException { CSVParser parser = CSVFormat.RFC4180.withHeader().withDelimiter(',').withAllowMissingColumnNames(true).parse(new InputStreamReader(this.inputStreamSource.getInputStream())); List<CSVRecord> records = parser.getRecords(); if (records.size() > 1) { throw new BuenOjoCSVParserException("El archivo contiene más de un indicador"); } if (records.size() == 0) { throw new BuenOjoCSVParserException("El archivo de indicador es inválido"); } CSVRecord record = records.get(0); PhotoLocationBeacon beacon = new PhotoLocationBeacon(); beacon.setX(new Integer(record.get(PhotoLocationBeaconCSVColumns.col.ordinal()))); beacon.setY(new Integer(record.get(PhotoLocationBeaconCSVColumns.row.ordinal()))); beacon.setTolerance(new Integer(record.get(PhotoLocationBeaconCSVColumns.tolerance.ordinal()))); return beacon; }
public List<TagCircle> parse() throws IOException, BuenOjoCSVParserException { ArrayList<TagCircle> list = new ArrayList<>(MAX_CIRCLES); CSVParser parser = CSVFormat.RFC4180.withHeader().withDelimiter(',').withAllowMissingColumnNames(false).parse(new InputStreamReader(this.inputStream)); for (CSVRecord record : parser ){ TagCircle circle = new TagCircle(); circle.setNumber(new Integer(record.get("id"))); circle.setX(new Integer(record.get("col"))); circle.setY(new Integer(record.get("row"))); circle.setRadioPx(new Float(record.get("radioPx"))); list.add(circle); } if (list.size()>MAX_CIRCLES){ throw new BuenOjoCSVParserException("el archivo contiene mas de "+MAX_CIRCLES+ "áreas circulares"); } return list; }
/** * Creates a new dataset with column labels and data read from the given Reader, using a specified input format. * * @param reader the Reader to read column labels and data from * @param input_format the format */ @SuppressWarnings("WeakerAccess") public DataSet(final Reader reader, final CSVFormat input_format) { this(); try (CSVParser parser = new CSVParser(reader, input_format.withHeader())) { labels.addAll(getColumnLabels(parser)); for (final CSVRecord record : parser) { final List<String> items = csvRecordToList(record); final int size = items.size(); // Don't add row if the line was empty. if (size > 1 || (size == 1 && items.get(0).length() > 0)) { records.add(items); } } reader.close(); } catch (final IOException e) { throw new RuntimeException(e); } }
public T csvToObject(InputStream is) throws IOException, IllegalArgumentException, IllegalAccessException, InstantiationException { WrapperReturner<T> list = new WrapperReturner(); try (InputStreamReader br = new InputStreamReader(is)) { CSVParser parser = new CSVParser(br, CSVFormat.DEFAULT); for (int i = 0, j = 0; i < parser.getRecordNumber(); i++) { j = 0; for (Field field : classFields) { setFieldValue(field, list.t, parser.getRecords().get(i).get(j)); list.tl.add(list.t); j++; } } } return list.t; }
private void compareCSVFiles(String actualPath, String expectedPath) { try (CSVParser parserTranslatedFile = createCsvParser(actualPath, ",")) { try (CSVParser parserExpectedFile = createCsvParser(expectedPath, ",")) { Iterator<CSVRecord> translatedIterator = parserTranslatedFile.iterator(); Iterator<CSVRecord> expectedIterator = parserExpectedFile.iterator(); while (translatedIterator.hasNext() && expectedIterator.hasNext()) { CSVRecord translatedRecord = translatedIterator.next(); CSVRecord expectedRecord = expectedIterator.next(); compareCSVRecords(translatedRecord, expectedRecord); } if (translatedIterator.hasNext()) { fail("Actual file is longer (has more lines) than expected file"); } if (expectedIterator.hasNext()) { fail("Expected file is longer (has more lines) than actual file"); } } } catch (Exception e) { fail("Exception while iterating over files"); } }
/** * sauvegarde dans la base de données * @param parser * @return */ public int updateDB(CSVParser parser) { int res = 0; DBManager.connect(); for (CSVRecord item : parser) { String serie = item.get(0).trim(); String mention = item.get(1).trim(); String specialite = item.get(2).trim(); String section = item.get(3).trim(); //enregistrer if (add(serie, mention, specialite, section)){ res++; } } DBManager.quit(); return res; }
/** * sauvegarde dans la base de données * @param parser * @return */ public int updateDB(CSVParser parser) { int res = 0; DBManager.connect(); for (CSVRecord item : parser) { String id = item.get(0).trim(); String serie = item.get(1).trim(); String mention = item.get(2).trim(); String specialite = item.get(3).trim(); String section = item.get(4).trim(); //enregistrer if (add(id, serie, mention, specialite, section)){ res++; } } DBManager.quit(); return res; }
/** * sauvegarde dans la base de données * @param parser * @return */ public int updateDB(CSVParser parser) { int res = 0; DBManager.connect(); for (CSVRecord item : parser) { String id = item.get(0).trim(); String code = item.get(1).trim(); String note = item.get(2).trim(); if (note.isEmpty()) { note=null; } //enregistrer if (add(id, code, note)){ res++; } } DBManager.quit(); return res; }
/** * sauvegarde dans la base de données des foreign keys * @param parser * @return */ public int updateFkDB(CSVParser parser) { int res = 0; DBManager.connect(); for (CSVRecord item : parser) { String id = item.get(0).trim(); String code = item.get(1).trim(); String matiere = item.get(3).trim(); //enregistrer if (!matiere.isEmpty()){ if (addMatiere(id, code, matiere)){ res++; } } } DBManager.quit(); return res; }
/** * sauvegarde dans la base de données des foreign keys * @param parser * @return */ public int updateFkDB(CSVParser parser) { int res = 0; DBManager.connect(); for (CSVRecord item : parser) { String code = item.get(0).trim(); String composition = item.get(2).trim(); String rattrapage = item.get(3).trim(); //enregistrer if (!composition.isEmpty()){ if (addComposition(code, composition)){ res++; } } if (!rattrapage.isEmpty()){ if (addRattrapage(code, rattrapage)){ res++; } } } DBManager.quit(); return res; }
/** * sauvegarde dans la base de données * @param parser * @return */ public int updateDB(CSVParser parser) { int res = 0; DBManager.connect(); for (CSVRecord item : parser) { String code = item.get(0).trim(); String libelle = item.get(1).trim(); String epreuve = item.get(2).trim(); //enregistrer if (add(code, libelle, epreuve)){ res++; } } DBManager.quit(); return res; }
/** * Putting this init here so that we can discover the file fields before running the actual rec */ public void init() { if (!this.initialized) { try { MutableList<String> fields; if (csvVersion == CsvStaticDataReader.CSV_V2) { CSVFormat csvFormat = getCsvFormat(delim, nullToken); this.csvreaderV2 = new CSVParser(reader, csvFormat); this.iteratorV2 = csvreaderV2.iterator(); fields = ListAdapter.adapt(IteratorUtils.toList(iteratorV2.next().iterator())); } else { this.csvreaderV1 = new au.com.bytecode.opencsv.CSVReader(this.reader, this.delim); fields = ArrayAdapter.adapt(this.csvreaderV1.readNext()); } this.fields = fields.collect(this.convertDbObjectName); } catch (Exception e) { throw new DeployerRuntimeException(e); } this.initialized = true; } }
private static String[] getUniqueFields(File inFile) throws IOException { CSVParser parser = new CSVParser(new BufferedReader(new FileReader(inFile)), CSVFormat.EXCEL.withNullString(NULL_STRING)); // first record used as header CSVRecord header = parser.iterator().next(); List<String> uniqueFields = new ArrayList<String>(); for(int i = 0; i < header.size(); i++) { String col = header.get(i); if (!uniqueFields.contains(col)) { // we can add it directly uniqueFields.add(col); } else { // disambiguate by appending index uniqueFields.add(col + "_" + i); } } return uniqueFields.toArray(new String[0]); }
@Test public void testFindSpreadsheetsFromCells() throws IOException { CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"), Charset.forName("utf-8"), CSVFormat.DEFAULT); List<Cell> cells = new ArrayList<>(); for (CSVRecord record : parse) { cells.add(new Cell(Float.parseFloat(record.get(0)), Float.parseFloat(record.get(1)), Float.parseFloat(record.get(2)), Float.parseFloat(record.get(3)))); } List<Rectangle> expected = Arrays.asList(EXPECTED_RECTANGLES); Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); List<Rectangle> foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER); assertTrue(foundRectangles.equals(expected)); }
@Test public void testNaturalOrderOfRectanglesOneMoreTime() throws IOException { CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestBasicExtractor-RECTANGLE_TEST_NATURAL_ORDER.csv"), Charset.forName("utf-8"), CSVFormat.DEFAULT); List<Rectangle> rectangles = new ArrayList<>(); for (CSVRecord record : parse) { rectangles.add(new Rectangle(Float.parseFloat(record.get(0)), Float.parseFloat(record.get(1)), Float.parseFloat(record.get(2)), Float.parseFloat(record.get(3)))); } //List<Rectangle> rectangles = Arrays.asList(RECTANGLES_TEST_NATURAL_ORDER); Utils.sort(rectangles, Rectangle.ILL_DEFINED_ORDER); for (int i = 0; i < (rectangles.size() - 1); i++) { Rectangle rectangle = rectangles.get(i); Rectangle nextRectangle = rectangles.get(i + 1); assertTrue(rectangle.compareTo(nextRectangle) < 0); } }
protected void initParser(InputSplit inSplit) throws IOException, InterruptedException { fileIn = openFile(inSplit, true); if (fileIn == null) { return; } instream = new InputStreamReader(fileIn, encoding); bytesRead = 0; fileLen = inSplit.getLength(); if (uriName == null) { generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false); if (generateId) { idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart()); } else { uriId = 0; } } parser = new CSVParser(instream, CSVParserFormatter. getFormat(delimiter, encapsulator, true, true)); parserIterator = parser.iterator(); }
/** * Imports a Sheet from a CSV file in the specified path. * @param path a CSV File Path. * @return a new Sheet or null if parsing failed */ public Sheet importSheet(String path) { File csvData = new File(path); // Parse the CSV file. CSVParser parser; try { parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.RFC4180); } catch (IOException e) { return null; } // Create our new sheet. Sheet sheet = new Sheet("Imported Sheet"); // Populate its cells. for (CSVRecord record : parser) { for (int x = 0; x < record.size(); ++x) { sheet.setCellValue(x, (int) record.getRecordNumber() - 1, record.get(x), true); } } return sheet; }
public static void main(String[] args) throws IOException { try(Reader reader = new InputStreamReader(ExampleStudentCSV.class.getClassLoader().getResourceAsStream("students.csv"))) { try(CSVParser parser=new CSVParser(reader, CSVFormat.EXCEL.withHeader("Name","Class","Dorm","Room","GPA").withFirstRecordAsHeader())) { System.out.printf("%20s | %20s\n", "Name", "Class"); System.out.printf("%20s-+-%20s\n", StringUtils.leftPad("", 20, '-'), StringUtils.leftPad("", 20, '-')); for(CSVRecord record: parser) { System.out.printf("%20s | %20s\n", record.get("Name"), record.get("Class")); } } } }
public static Map<String, LatLong> postcodeToLatLong(String datasetProvider, DownloadUtils downloadUtils) throws Exception { Map<String, LatLong> postcodeToCoordMap = new HashMap<>(); InputStreamReader postcodeIsr = new InputStreamReader(downloadUtils.fetchInputStream(new URL(POSTCODE_TO_COORDINATE_URL), datasetProvider, ".csv")); CSVParser csvFileParser = new CSVParser(postcodeIsr, CSVFormat.DEFAULT); Iterator<CSVRecord> iter = csvFileParser.getRecords().iterator(); CSVRecord header = iter.next(); while (iter.hasNext()) { CSVRecord record = iter.next(); postcodeToCoordMap.put(record.get(1), new LatLong(record.get(2), record.get(3))); } return postcodeToCoordMap; }
protected void setupUtils() throws Exception { CSVFormat format = CSVFormat.DEFAULT; String fileLocation = config.getFileLocation(); URL url; try { url = new URL(fileLocation); } catch (MalformedURLException e) { File file; if (!(file = new File(fileLocation)).exists()) { log.error("File does not exist: ", fileLocation); } url = file.toURI().toURL(); } InputStreamReader isr = new InputStreamReader( downloadUtils.fetchInputStream(url, getProvider().getLabel(), ".csv")); CSVParser csvFileParser = new CSVParser(isr, format); csvRecords = csvFileParser.getRecords(); }
@Override public List<Attribute> getTimedValueAttributes(String datasourceIdString) throws Exception { List<Attribute> attributes = new ArrayList<>(); for (SubjectRecipe subjectRecipe : subjectRecipes) { String headerRowUrl = getDataUrl(datasourceIdString, subjectRecipe.getSubjectType()) + "&recordlimit=0"; File headerRowStream = downloadUtils.fetchFile(new URL(headerRowUrl), getProvider().getLabel(), ".csv"); CSVParser csvParser = new CSVParser(new FileReader(headerRowStream), CSVFormat.RFC4180.withFirstRecordAsHeader()); for (String header : csvParser.getHeaderMap().keySet()) { if (!BLACK_LIST_HEADERS.contains(header)) { String attributeLabel = attributeLabelFromHeader(header); attributes.add(new Attribute(getProvider(), attributeLabel, header)); } } } return attributes; }
@Test public void testExportsCSV() throws Exception { DataExportSpecificationBuilder csvBuilder = DataExportSpecificationBuilder.withCSVExporter(); csvBuilder .addSubjectSpecification( new SubjectSpecificationBuilder(AbstractONSImporter.PROVIDER.getLabel(), "lsoa").setMatcher("label", "E01002766")) .addDatasourceSpecification("uk.org.tombolo.importer.ons.CensusImporter", "qs103ew", "") .addFieldSpecification( FieldBuilder.fractionOfTotal("percentage_under_1_years_old_label") .addDividendAttribute("uk.gov.ons", "Age: Age under 1") // number under one year old .setDivisorAttribute("uk.gov.ons", "Age: All categories: Age") // total population ); engine.execute(csvBuilder.build(), writer); List<CSVRecord> records = CSVParser.parse(writer.toString(), CSVFormat.DEFAULT.withHeader()).getRecords(); assertEquals(1, records.size()); assertEquals("E01002766", records.get(0).get("label")); assertEquals("0.012263099219620958", records.get(0).get("percentage_under_1_years_old_label")); }
/** * Each String in the stream is a CSV file * @return stream of parsed insert queries */ public Stream<Map<String, Object>> convert() { try{ CSVParser csvParser = CSVFormat.newFormat(separator) .withIgnoreEmptyLines() .withEscape('\\' ) .withFirstRecordAsHeader() .withQuote(quote) .withNullString(nullString) .parse(reader); return stream(csvParser.iterator()).map(this::parse); } catch (IOException e){ throw new RuntimeException(e); } }
@Override public void startRevisionProcessing() { logger.debug("Starting..."); try { BufferedReader csvReader; csvReader = new BufferedReader( new InputStreamReader( new BZip2CompressorInputStream( new BufferedInputStream( new FileInputStream(geolocationFeatureFile))), "UTF-8")); csvParser = new CSVParser(csvReader, CSVFormat.RFC4180.withHeader()); iterator = csvParser.iterator(); processor.startRevisionProcessing(); } catch (IOException e) { logger.error("", e); } }
/** * Initializes the label reader. */ public void startReading() { try { BufferedReader csvReader = new BufferedReader( new InputStreamReader(labelsStream, "UTF-8"), BUFFER_SIZE); csvParser = new CSVParser(csvReader, CSVFormat.RFC4180.withHeader(FILE_HEADER)); iterator = csvParser.iterator(); CSVRecord headerRecord = iterator.next(); for (int i = 0; i < FILE_HEADER.length; i++) { if (!FILE_HEADER[i].equals(headerRecord.get(i))) { throw new IOException( "The header of the CSV file is wrong."); } } } catch (IOException e) { logger.error("", e); finishReading(); } }
public static void main(String[] args) throws IOException { CSVParser parser = CSVParser.parse(new File("dev/twitter-hate-speech-processed.csv"), Charset.forName("Cp1252"), CSVFormat.DEFAULT); try (PrintWriter writer = new PrintWriter("training/bad/model_comments_bad_tweets.txt", "UTF-8")) { boolean skipFirst = true; for (CSVRecord r : parser) { if (skipFirst){ skipFirst=false; continue; } String classif = r.get(0); if (classif.equalsIgnoreCase("The tweet is not offensive")) { continue; } writer.println(r.get(2)); } }finally { parser.close(); } }
public static void appendToEndOfLine(Path targetDumpFile, String toAppend) throws Exception { Path tmp = Files.createTempFile(null, null); try (CSVParser csvParser = new CSVParser(Files.newBufferedReader(targetDumpFile), CSV_DUMP_FORMAT)) { try (CSVPrinter csvPrinter = new CSVPrinter(Files.newBufferedWriter(tmp), CSV_DUMP_FORMAT)) { csvParser.forEach(record -> { List<String> newRecord = new ArrayList<>(); record.forEach(val -> newRecord.add(val)); newRecord.add(toAppend); try { csvPrinter.printRecord(newRecord); } catch (IOException e) { throw new RuntimeException("Error appending to EOL", e); } }); } } Files.move(tmp, targetDumpFile, StandardCopyOption.REPLACE_EXISTING); }
public static BeamRecord csvLine2BeamRecord( CSVFormat csvFormat, String line, BeamRecordSqlType beamRecordSqlType) { List<Object> fieldsValue = new ArrayList<>(beamRecordSqlType.getFieldCount()); try (StringReader reader = new StringReader(line)) { CSVParser parser = csvFormat.parse(reader); CSVRecord rawRecord = parser.getRecords().get(0); if (rawRecord.size() != beamRecordSqlType.getFieldCount()) { throw new IllegalArgumentException(String.format( "Expect %d fields, but actually %d", beamRecordSqlType.getFieldCount(), rawRecord.size() )); } else { for (int idx = 0; idx < beamRecordSqlType.getFieldCount(); idx++) { String raw = rawRecord.get(idx); fieldsValue.add(autoCastField(beamRecordSqlType.getFieldTypeByIndex(idx), raw)); } } } catch (IOException e) { throw new IllegalArgumentException("decodeRecord failed!", e); } return new BeamRecord(beamRecordSqlType, fieldsValue); }
/** * String Parsing */ public static String[] splitStr(String val, Integer len) throws IOException { String[] input; try { CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT); CSVRecord record = parser.getRecords().get(0); input = new String[len]; Iterator<String> valuesIt = record.iterator(); int i = 0; while (valuesIt.hasNext()) { input[i] = valuesIt.next().trim(); i++; } parser.close(); } catch (ArrayIndexOutOfBoundsException e) { input = val.split(",", len); for (int i = 0; i < input.length; i++) input[i] = input[i].trim(); } return input; }
@SuppressWarnings("resource") // @Test public void testReaderFromURL() throws UIMAException, IOException { CSVParser reader = new CSVParser(new FileReader(new File(csvFilename)), CSVFormat.TDF.withHeader((String) null)); List<CSVRecord> records = reader.getRecords(); description = CollectionReaderFactory.createReaderDescription(TextgridTEIUrlReader.class, TextgridTEIUrlReader.PARAM_INPUT, csvFilename, TextgridTEIUrlReader.PARAM_LANGUAGE, "de"); JCasIterator iter = SimplePipeline .iteratePipeline(description, AnalysisEngineFactory.createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/doc/", XmiWriter.PARAM_USE_DOCUMENT_ID, true)) .iterator(); JCas jcas; CSVRecord gold; int recordIndex = 0; while (iter.hasNext()) { jcas = iter.next(); gold = records.get(recordIndex++); checkSanity(jcas); checkGold(jcas, gold); } }
/** * Adds labels read in a CSV file to the Jena model. * * @param filePath The path of the CSV file. * @param version The version of the CPC classification. * @param language The tag representing the language of the labels ("fr", "es", etc.). */ private void addLabels(String filePath, String version, String language) { if (filePath == null) return; logger.debug("Preparing to create additional labels for version " + version + ", language is " + language); try { Reader reader = new InputStreamReader(new FileInputStream(filePath), "Cp1252"); CSVParser parser = new CSVParser(reader, CSVFormat.DEFAULT.withHeader()); for (CSVRecord record : parser) { String itemCode = record.get(0); Resource itemResource = cpcModel.createResource(Names.getItemURI(itemCode, "CPC", version)); itemResource.addProperty(SKOS.prefLabel, cpcModel.createLiteral(record.get(1), language)); } parser.close(); reader.close(); } catch (Exception e) { logger.error("Error adding labels from " + filePath, e); } }