Java 类org.apache.hadoop.io.SequenceFile.Reader.Option 实例源码

项目:GeoCrawler    文件:TestInjector.java   
private List<String> readCrawldb() throws IOException {
  Path dbfile = new Path(crawldbPath, CrawlDb.CURRENT_NAME
      + "/part-00000/data");
  System.out.println("reading:" + dbfile);
  Option rFile = SequenceFile.Reader.file(dbfile);
  @SuppressWarnings("resource")
  SequenceFile.Reader reader = new SequenceFile.Reader(conf, rFile);
  ArrayList<String> read = new ArrayList<String>();

  READ: do {
    Text key = new Text();
    CrawlDatum value = new CrawlDatum();
    if (!reader.next(key, value))
      break READ;
    read.add(key.toString());
  } while (true);

  return read;
}
项目:GeoCrawler    文件:TestInjector.java   
private HashMap<String, CrawlDatum> readCrawldbRecords() throws IOException {
  Path dbfile = new Path(crawldbPath, CrawlDb.CURRENT_NAME
      + "/part-00000/data");
  System.out.println("reading:" + dbfile);
  Option rFile = SequenceFile.Reader.file(dbfile);
  @SuppressWarnings("resource")
  SequenceFile.Reader reader = new SequenceFile.Reader(conf, rFile);
  HashMap<String, CrawlDatum> read = new HashMap<String, CrawlDatum>();

  READ: do {
    Text key = new Text();
    CrawlDatum value = new CrawlDatum();
    if (!reader.next(key, value))
      break READ;
    read.put(key.toString(), value);
  } while (true);

  return read;
}
项目:GeoCrawler    文件:TestCrawlDbFilter.java   
/**
 * Read contents of fetchlist.
 * 
 * @param fetchlist
 *          path to Generated fetchlist
 * @return Generated {@link URLCrawlDatum} objects
 * @throws IOException
 */
private ArrayList<URLCrawlDatum> readContents(Path fetchlist)
    throws IOException {
  // verify results
  Option fFile = SequenceFile.Reader.file(fetchlist);
  SequenceFile.Reader reader = new SequenceFile.Reader(conf, fFile);

  ArrayList<URLCrawlDatum> l = new ArrayList<URLCrawlDatum>();

  READ: do {
    Text key = new Text();
    CrawlDatum value = new CrawlDatum();
    if (!reader.next(key, value)) {
      break READ;
    }
    l.add(new URLCrawlDatum(key, value));
  } while (true);

  reader.close();
  return l;
}
项目:GeoCrawler    文件:TestGenerator.java   
/**
 * Read contents of fetchlist.
 * 
 * @param fetchlist
 *          path to Generated fetchlist
 * @return Generated {@link URLCrawlDatum} objects
 * @throws IOException
 */
private ArrayList<URLCrawlDatum> readContents(Path fetchlist)
    throws IOException {
  // verify results
  Option rFile = SequenceFile.Reader.file(fetchlist);
  SequenceFile.Reader reader = new SequenceFile.Reader(conf, rFile);

  ArrayList<URLCrawlDatum> l = new ArrayList<URLCrawlDatum>();

  READ: do {
    Text key = new Text();
    CrawlDatum value = new CrawlDatum();
    if (!reader.next(key, value)) {
      break READ;
    }
    l.add(new URLCrawlDatum(key, value));
  } while (true);

  reader.close();
  return l;
}
项目:kylin    文件:CubeStatsReader.java   
public CubeStatsResult(Path path, int precision) throws IOException {
    Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
    Option seqInput = SequenceFile.Reader.file(path);
    try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
        LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
        BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
        while (reader.next(key, value)) {
            if (key.get() == 0L) {
                percentage = Bytes.toInt(value.getBytes());
            } else if (key.get() == -1) {
                mapperOverlapRatio = Bytes.toDouble(value.getBytes());
            } else if (key.get() == -2) {
                mapperNumber = Bytes.toInt(value.getBytes());
            } else if (key.get() > 0) {
                HLLCounter hll = new HLLCounter(precision);
                ByteArray byteArray = new ByteArray(value.getBytes());
                hll.readRegisters(byteArray.asBuffer());
                counterMap.put(key.get(), hll);
            }
        }
    }
}
项目:warcutils    文件:WarcSequenceFileRecordReader.java   
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = context.getConfiguration();
    final Path path = split.getPath();

    Option optPath = SequenceFile.Reader.file(path);
    in = new SequenceFile.Reader(conf, optPath);

    this.end = split.getStart() + inputSplit.getLength();
    if (split.getStart() > in.getPosition()) {
        in.sync(split.getStart());
    }
    start = in.getPosition();
    done = start >= end;
}
项目:cdk    文件:ReadSequenceFileBuilder.java   
@Override
protected boolean doProcess(Record inputRecord, InputStream in) throws IOException {
  FSDataInputStream fsInputStream = new FSDataInputStream(new ForwardOnlySeekable(in));
  Option opt = SequenceFile.Reader.stream(fsInputStream);
  SequenceFile.Metadata sequenceFileMetaData = null;
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(conf, opt);   
    if (includeMetaData) {
      sequenceFileMetaData = reader.getMetadata();
    }
    Class keyClass = reader.getKeyClass();
    Class valueClass = reader.getValueClass();
    Record template = inputRecord.copy();
    removeAttachments(template);

    while (true) {
      Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
      Writable val = (Writable)ReflectionUtils.newInstance(valueClass, conf);
      try {
        if (!reader.next(key, val)) {
          break;
        }
      } catch (EOFException ex) {
        // SequenceFile.Reader will throw an EOFException after reading
        // all the data, if it doesn't know the length.  Since we are
        // passing in an InputStream, we hit this case;
        LOG.trace("Received expected EOFException", ex);
        break;
      }
      incrementNumRecords();
      Record outputRecord = template.copy();
      outputRecord.put(keyField, key);
      outputRecord.put(valueField, val);
      outputRecord.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE);
      if (includeMetaData && sequenceFileMetaData != null) {
        outputRecord.put(SEQUENCE_FILE_META_DATA, sequenceFileMetaData);
      }

      // pass record to next command in chain:
      if (!getChild().process(outputRecord)) {
        return false;
      }
    }
  } finally {
    Closeables.closeQuietly(reader);
  }
  return true;
}