@Test public void testJavaSerialization() throws Exception { Path file = new Path(System.getProperty("test.build.data",".") + "/testseqser.seq"); fs.delete(file, true); Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class, String.class); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); Reader reader = new Reader(fs, file, conf); assertEquals(1L, reader.next((Object) null)); assertEquals("one", reader.getCurrentValue((Object) null)); assertEquals(2L, reader.next((Object) null)); assertEquals("two", reader.getCurrentValue((Object) null)); assertNull(reader.next((Object) null)); reader.close(); }
@Override public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException { try (Writer writer = newWriter(pathToListFile)) { Path sourceRootPath = getRootPath(getConf()); for (Path sourcePath : options.getSourcePaths()) { FileSystem fileSystem = sourcePath.getFileSystem(getConf()); FileStatus directory = fileSystem.getFileStatus(sourcePath); Map<String, CopyListingFileStatus> children = new FileStatusTreeTraverser(fileSystem) .preOrderTraversal(directory) .transform(new CopyListingFileStatusFunction(fileSystem, options)) .uniqueIndex(new RelativePathFunction(sourceRootPath)); for (Entry<String, CopyListingFileStatus> entry : children.entrySet()) { LOG.debug("Adding '{}' with relative path '{}'", entry.getValue().getPath(), entry.getKey()); writer.append(new Text(entry.getKey()), entry.getValue()); writer.sync(); } } } }
public void testJavaSerialization() throws Exception { Path file = new Path(System.getProperty("test.build.data",".") + "/testseqser.seq"); fs.delete(file, true); Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class, String.class); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); Reader reader = new Reader(fs, file, conf); assertEquals(1L, reader.next((Object) null)); assertEquals("one", reader.getCurrentValue((Object) null)); assertEquals(2L, reader.next((Object) null)); assertEquals("two", reader.getCurrentValue((Object) null)); assertNull(reader.next((Object) null)); reader.close(); }
@Test public void testSimpleConsumerWithEmptySequenceFile() throws Exception { if (!canTest()) { return; } final Path file = new Path(new File("target/test/test-camel-sequence-file").getAbsolutePath()); Configuration conf = new Configuration(); SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, BooleanWritable.class); writer.sync(); writer.close(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(0); context.addRoutes(new RouteBuilder() { public void configure() { from("hdfs2:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&chunkSize=4096&initialDelay=0").to("mock:result"); } }); context.start(); resultEndpoint.assertIsSatisfied(); }
public void testJavaSerialization() throws Exception { Path file = new Path(System.getProperty("test.build.data",".") + "/test.seq"); fs.delete(file, true); Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class, String.class); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); Reader reader = new Reader(fs, file, conf); assertEquals(1L, reader.next((Object) null)); assertEquals("one", reader.getCurrentValue((Object) null)); assertEquals(2L, reader.next((Object) null)); assertEquals("two", reader.getCurrentValue((Object) null)); assertNull(reader.next((Object) null)); reader.close(); }
public MahoutOutput(String output, FileSystem fs, Configuration conf) throws IOException { //clear the output dir Path basedir = new Path(output); if (fs.exists(basedir)) { fs.delete(basedir, true); //Delete existing Directory } fs.mkdirs(basedir); String dictOutput = output + "/dictionary.file"; dictWriter = createWriter(conf, Writer.file(new Path(dictOutput)), Writer.keyClass(Text.class), Writer.valueClass(IntWritable.class)); String vectorsPath = output + "/tfidf-vectors"; tfidfWriter = new SequenceFile.Writer(fs, conf, new Path(vectorsPath), Text.class, VectorWritable.class); }
public SequenceEventWriterInstance(OutputStream stream,// Class<?> keyClass, // Class<?> valueClass,// CompressionType compressionType) { if (!(stream instanceof FSDataOutputStream)) { throw new RuntimeException( "OutputStream must be a FSDataOutputStream"); } try { writer = SequenceFile.createWriter(hdfs.getHadoopConfig(), Writer.stream((FSDataOutputStream) stream), Writer.keyClass(keyClass), Writer.valueClass(valueClass), Writer.compression(compressionType)); } catch (IOException e) { throw new RuntimeException(e); } }
/** * Create control files before a test run. * Number of files created is equal to the number of maps specified * * @throws IOException on error */ private void createControlFiles() throws IOException { LOG.info("Creating " + numberOfMaps + " control files"); for (int i = 0; i < numberOfMaps; i++) { String strFileName = "NNBench_Controlfile_" + i; Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME), strFileName); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(getConf(), Writer.file(filePath), Writer.keyClass(Text.class), Writer.valueClass(LongWritable.class), Writer.compression(CompressionType.NONE)); writer.append(new Text(strFileName), new LongWritable(i)); } finally { if (writer != null) { writer.close(); } } } }
@Test public void testJavaSerialization() throws Exception { Path file = new Path(GenericTestUtils.getTempPath("testseqser.seq")); fs.delete(file, true); Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class, String.class); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); Reader reader = new Reader(fs, file, conf); assertEquals(1L, reader.next((Object) null)); assertEquals("one", reader.getCurrentValue((Object) null)); assertEquals(2L, reader.next((Object) null)); assertEquals("two", reader.getCurrentValue((Object) null)); assertNull(reader.next((Object) null)); reader.close(); }
@SuppressWarnings("unchecked") private Writer createWriter(Map<String, String> metadata) throws IOException { final Metadata md = new Metadata(); for (final Entry<String, String> e : metadata.entrySet()) { md.set(new Text(e.getKey()), new Text(e.getValue())); } final Class<K> keyClass = (Class<K>) ((ParameterizedType) getClass().getGenericSuperclass()) .getActualTypeArguments()[0]; final Class<V> valueClass = (Class<V>) ((ParameterizedType) getClass().getGenericSuperclass()) .getActualTypeArguments()[1]; return SequenceFile.createWriter(fileSystem, config, sequenceFilePath, keyClass, valueClass, compressionType, new DefaultCodec(), null, md); }
public int run(String[] args) throws Exception { // Configuration processed by ToolRunner Properties p = new Properties(); p.load(new FileInputStream(new File(args[0]))); configure(p); String inputDirectoryName = p.getProperty("input.directory"); File f = new File(inputDirectoryName); if(!f.exists() || !f.isDirectory()){ logger.error("Invalid input directory: " + inputDirectoryName); return -1; } String outputFileName = p.getProperty("output.file"); Path outputPath = new Path(outputFileName); SequenceFile.Writer writer = SequenceFile.createWriter(getConf(),Writer.keyClass(Text.class), Writer.valueClass(Text.class), Writer.file(outputPath)); for(File document : f.listFiles()) { String contents = FileUtils.readFileToString(document); writer.append(new Text(document.getName()), new Text(contents)); } writer.close(); return 0; }
private void writeRowIds(Writer writer, SegmentReader segmentReader) throws IOException { Terms terms = segmentReader.terms(BlurConstants.ROW_ID); if (terms == null) { return; } TermsEnum termsEnum = terms.iterator(null); BytesRef rowId; long s = System.nanoTime(); while ((rowId = termsEnum.next()) != null) { long n = System.nanoTime(); if (n + _10_SECONDS > s) { _progressable.progress(); s = System.nanoTime(); } writer.append(new Text(rowId.utf8ToString()), NullWritable.get()); } }
private synchronized void storeGenerations() throws IOException { FileSystem fileSystem = _path.getFileSystem(_configuration); FileStatus[] listStatus = fileSystem.listStatus(_path); SortedSet<FileStatus> existing = new TreeSet<FileStatus>(Arrays.asList(listStatus)); long currentFile; if (!existing.isEmpty()) { FileStatus last = existing.last(); currentFile = Long.parseLong(last.getPath().getName()); } else { currentFile = 0; } Path path = new Path(_path, buffer(currentFile + 1)); LOG.info("Creating new snapshot file [{0}]", path); FSDataOutputStream outputStream = fileSystem.create(path, false); Writer writer = SequenceFile.createWriter(_configuration, outputStream, Text.class, LongWritable.class, CompressionType.NONE, null); for (Entry<String, Long> e : _namesToGenerations.entrySet()) { writer.append(new Text(e.getKey()), new LongWritable(e.getValue())); } writer.close(); outputStream.close(); cleanupOldFiles(fileSystem, existing); }
/** * Traverse the directory and add files to the sequencefile * @param seq sequencefile * @param pFile */ private static void traverseAdd(Writer seq, File pFile) { if(pFile.isDirectory()) { for(File file:pFile.listFiles()) { traverseAdd(seq, file); } } else { try { addFile(seq, pFile); } catch (IOException e) { e.printStackTrace(); } } }
/** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Usage: ConvertFastaForCloud file.fa outfile.br"); System.exit(-1); } String infile = args[0]; String outfile = args[1]; System.err.println("Converting " + infile + " into " + outfile); JobConf config = new JobConf(); SequenceFile.Writer writer = SequenceFile.createWriter(FileSystem.get(config), config, new Path(outfile), IntWritable.class, BytesWritable.class); convertFile(infile, writer); writer.close(); System.err.println("min_seq_len: " + min_seq_len); System.err.println("max_seq_len: " + max_seq_len); System.err.println("Using DNAString version: " + DNAString.VERSION); }
private void createInputFiles(String inputPathString, long numFiles, long fileSize, String outputPath) /* */ { /* */ try { /* 75 */ FileSystem fs = FileSystem.get(new URI(inputPathString), this.conf); /* 76 */ fs.mkdirs(new Path(inputPathString)); /* 77 */ for (int fileNumber = 1; fileNumber <= numFiles; fileNumber++) { /* 78 */ String inputFileName = join(inputPathString, Integer.valueOf(fileNumber)); /* 79 */ Path inputFilePath = new Path(inputFileName); /* 80 */ fs.delete(inputFilePath, true); /* 81 */ SequenceFile.Writer writer = SequenceFile.createWriter(fs, this.conf, inputFilePath, LongWritable.class, CreateFileInfo.class, SequenceFile.CompressionType.NONE); /* */ try /* */ { /* 84 */ writer.append(new LongWritable(fileNumber), new CreateFileInfo(join(outputPath, Integer.valueOf(fileNumber)), fileSize)); /* */ } finally { /* 86 */ writer.close(); /* */ } /* */ } /* */ } catch (Exception e) { /* 90 */ throw new RuntimeException(e); /* */ } /* */ }
@Test(timeout = 30000) public void testAppendRecordCompression() throws Exception { GenericTestUtils.assumeInNativeProfile(); Path file = new Path(ROOT_PATH, "testseqappendblockcompr.seq"); fs.delete(file, true); Option compressOption = Writer.compression(CompressionType.RECORD, new GzipCodec()); Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), compressOption); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); verify2Values(file); writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Long.class), SequenceFile.Writer.valueClass(String.class), SequenceFile.Writer.appendIfExists(true), compressOption); writer.append(3L, "three"); writer.append(4L, "four"); writer.close(); verifyAll4Values(file); fs.deleteOnExit(file); }
private void writeOutput(RemoteIterator<? extends FileStatus> input) throws IOException { Path outPath = new Path(output); if (distribFs.exists(outPath)) { throw new IllegalArgumentException("Output file already exists, Not overwriting it:" + output); } Writer writer = SequenceFile.createWriter(distribFs.getConf(), Writer.file(outPath), Writer.keyClass(Text.class), Writer.valueClass(BytesWritable.class), Writer.compression(SequenceFile.CompressionType.RECORD)); Text key = new Text(); BytesWritable value = new BytesWritable(); long skipped = 0; long copied = 0; while (input.hasNext()) { FileStatus next = input.next(); if (filter(next)) { key.set(next.getPath().toString()); FSDataInputStream stream = localFs.open(next.getPath()); //CAUTION : this could cause memory overflow byte[] bytes = IOUtils.toByteArray(stream); value.set(bytes, 0, bytes.length); writer.append(key, value); copied++; } else { skipped++; } } writer.close(); System.out.println("Files copied ::" + copied); System.out.println("Files skipped ::" + skipped); }
private Writer newWriter(Path pathToListFile) throws IOException { FileSystem fs = pathToListFile.getFileSystem(getConf()); if (fs.exists(pathToListFile)) { fs.delete(pathToListFile, false); } return createWriter(getConf(), file(pathToListFile), keyClass(Text.class), valueClass(CopyListingFileStatus.class), compression(NONE)); }
/** Test hsync via SequenceFiles */ @Test public void testSequenceFileSync() throws Exception { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); final FileSystem fs = cluster.getFileSystem(); final Path p = new Path("/testSequenceFileSync/foo"); final int len = 1 << 16; FSDataOutputStream out = fs.create(p, FsPermission.getDefault(), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE, CreateFlag.SYNC_BLOCK), 4096, (short) 1, len, null); Writer w = SequenceFile.createWriter(new Configuration(), Writer.stream(out), Writer.keyClass(RandomDatum.class), Writer.valueClass(RandomDatum.class), Writer.compression(CompressionType.NONE, new DefaultCodec())); w.hflush(); checkSyncMetric(cluster, 0); w.hsync(); checkSyncMetric(cluster, 1); int seed = new Random().nextInt(); RandomDatum.Generator generator = new RandomDatum.Generator(seed); generator.next(); w.append(generator.getKey(), generator.getValue()); w.hsync(); checkSyncMetric(cluster, 2); w.close(); checkSyncMetric(cluster, 2); out.close(); checkSyncMetric(cluster, 3); cluster.shutdown(); }
/** * Initializes the hadoop sorter. Does some local file system setup, and is somewhat expensive * (~20 ms on local machine). Only executed when necessary. */ private void initHadoopSorter() throws IOException { if (!initialized) { tempDir = new Path(options.getTempLocation(), "tmp" + UUID.randomUUID().toString()); paths = new Path[] {new Path(tempDir, "test.seq")}; JobConf conf = new JobConf(); // Sets directory for intermediate files created during merge of merge sort conf.set("io.seqfile.local.dir", tempDir.toUri().getPath()); writer = SequenceFile.createWriter( conf, Writer.valueClass(BytesWritable.class), Writer.keyClass(BytesWritable.class), Writer.file(paths[0]), Writer.compression(CompressionType.NONE)); FileSystem fs = FileSystem.getLocal(conf); // Directory has to exist for Hadoop to recognize it as deletable on exit fs.mkdirs(tempDir); fs.deleteOnExit(tempDir); sorter = new SequenceFile.Sorter( fs, new BytesWritable.Comparator(), BytesWritable.class, BytesWritable.class, conf); sorter.setMemory(options.getMemoryMB() * 1024 * 1024); initialized = true; } }
public int run(String[] args) throws IOException { if (args.length != 2) { System.err.println("USAGE: hadoop fr.liglab.mining.AsciiToSequenceFile INPUT OUTPUT"); } FileSystem fs = FileSystem.get(getConf()); Writer writer = new Writer(fs, getConf(), new Path(args[1]), NullWritable.class, TransactionWritable.class); NullWritable keyW = NullWritable.get(); TransactionWritable valueW = new TransactionWritable(); FileReader reader = new FileReader(args[0]); ItemsetsFactory factory = new ItemsetsFactory(); while(reader.hasNext()) { TransactionReader source = reader.next(); while(source.hasNext()) { factory.add(source.next()); } valueW.set(factory.get()); writer.append(keyW, valueW); } writer.close(); reader.close(); return 0; }
@Test public void testReadBoolean() throws Exception { if (!canTest()) { return; } final Path file = new Path(new File("target/test/test-camel-boolean").getAbsolutePath()); Configuration conf = new Configuration(); SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, BooleanWritable.class); NullWritable keyWritable = NullWritable.get(); BooleanWritable valueWritable = new BooleanWritable(); valueWritable.set(true); writer.append(keyWritable, valueWritable); writer.sync(); writer.close(); context.addRoutes(new RouteBuilder() { public void configure() { from("hdfs2:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result"); } }); context.start(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(1); resultEndpoint.assertIsSatisfied(); }
@Test public void testReadByte() throws Exception { if (!canTest()) { return; } final Path file = new Path(new File("target/test/test-camel-byte").getAbsolutePath()); Configuration conf = new Configuration(); SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, ByteWritable.class); NullWritable keyWritable = NullWritable.get(); ByteWritable valueWritable = new ByteWritable(); byte value = 3; valueWritable.set(value); writer.append(keyWritable, valueWritable); writer.sync(); writer.close(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(1); resultEndpoint.message(0).body(byte.class).isEqualTo(3); context.addRoutes(new RouteBuilder() { public void configure() { from("hdfs2:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result"); } }); context.start(); resultEndpoint.assertIsSatisfied(); }