Java 类org.apache.hadoop.io.RawComparator 实例源码

项目:hadoop    文件:ReduceContextImpl.java   
public ReduceContextImpl(Configuration conf, TaskAttemptID taskid,
                         RawKeyValueIterator input, 
                         Counter inputKeyCounter,
                         Counter inputValueCounter,
                         RecordWriter<KEYOUT,VALUEOUT> output,
                         OutputCommitter committer,
                         StatusReporter reporter,
                         RawComparator<KEYIN> comparator,
                         Class<KEYIN> keyClass,
                         Class<VALUEIN> valueClass
                        ) throws InterruptedException, IOException{
  super(conf, taskid, output, committer, reporter);
  this.input = input;
  this.inputKeyCounter = inputKeyCounter;
  this.inputValueCounter = inputValueCounter;
  this.comparator = comparator;
  this.serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(buffer);
  this.valueDeserializer = serializationFactory.getDeserializer(valueClass);
  this.valueDeserializer.open(buffer);
  hasMore = input.next();
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  this.conf = conf;
  this.taskid = taskid;
}
项目:hadoop    文件:Task.java   
public ValuesIterator (RawKeyValueIterator in, 
                       RawComparator<KEY> comparator, 
                       Class<KEY> keyClass,
                       Class<VALUE> valClass, Configuration conf, 
                       Progressable reporter)
  throws IOException {
  this.in = in;
  this.comparator = comparator;
  this.reporter = reporter;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(keyIn);
  this.valDeserializer = serializationFactory.getDeserializer(valClass);
  this.valDeserializer.open(this.valueIn);
  readNextKey();
  key = nextKey;
  nextKey = null; // force new instance creation
  hasNext = more;
}
项目:hadoop    文件:Task.java   
@SuppressWarnings("unchecked")
protected OldCombinerRunner(Class<? extends Reducer<K,V,K,V>> cls,
                            JobConf conf,
                            Counters.Counter inputCounter,
                            TaskReporter reporter) {
  super(inputCounter, conf, reporter);
  combinerClass = cls;
  keyClass = (Class<K>) job.getMapOutputKeyClass();
  valueClass = (Class<V>) job.getMapOutputValueClass();
  comparator = (RawComparator<K>)
      job.getCombinerKeyGroupingComparator();
}
项目:hadoop    文件:Task.java   
@SuppressWarnings("unchecked")
NewCombinerRunner(Class reducerClass,
                  JobConf job,
                  org.apache.hadoop.mapreduce.TaskAttemptID taskId,
                  org.apache.hadoop.mapreduce.TaskAttemptContext context,
                  Counters.Counter inputCounter,
                  TaskReporter reporter,
                  org.apache.hadoop.mapreduce.OutputCommitter committer) {
  super(inputCounter, job, reporter);
  this.reducerClass = reducerClass;
  this.taskId = taskId;
  keyClass = (Class<K>) context.getMapOutputKeyClass();
  valueClass = (Class<V>) context.getMapOutputValueClass();
  comparator = (RawComparator<K>) context.getCombinerKeyGroupingComparator();
  this.committer = committer;
}
项目:hadoop    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass, 
                          CompressionCodec codec,
                          Path[] inputs, boolean deleteInputs, 
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
throws IOException {
  return 
    new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, 
                         reporter, null,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                         mergeFactor, tmpDir,
                                         readsCounter, writesCounter, 
                                         mergePhase);
}
项目:hadoop    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass, 
                          CompressionCodec codec,
                          Path[] inputs, boolean deleteInputs, 
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator,
                          Progressable reporter,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Counters.Counter mergedMapOutputsCounter,
                          Progress mergePhase)
throws IOException {
  return 
    new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, 
                         reporter, mergedMapOutputsCounter,
                         TaskType.REDUCE).merge(
                                         keyClass, valueClass,
                                         mergeFactor, tmpDir,
                                         readsCounter, writesCounter,
                                         mergePhase);
}
项目:hadoop    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs, 
                          Class<K> keyClass, Class<V> valueClass, 
                          List<Segment<K, V>> segments, 
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
    throws IOException {
  return merge(conf, fs, keyClass, valueClass, segments, mergeFactor, tmpDir,
               comparator, reporter, false, readsCounter, writesCounter,
               mergePhase);
}
项目:hadoop    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          List<Segment<K, V>> segments,
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                             mergeFactor, tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:hadoop    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          CompressionCodec codec,
                          List<Segment<K, V>> segments,
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase,
                          TaskType taskType)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments, codec,
                         taskType).merge(keyClass, valueClass,
                                             mergeFactor, tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:hadoop    文件:Merger.java   
public static <K extends Object, V extends Object>
  RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          List<Segment<K, V>> segments,
                          int mergeFactor, int inMemSegments, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                             mergeFactor, inMemSegments,
                                             tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:hadoop    文件:Merger.java   
public static <K extends Object, V extends Object>
  RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          CompressionCodec codec,
                          List<Segment<K, V>> segments,
                          int mergeFactor, int inMemSegments, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments, codec,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                             mergeFactor, inMemSegments,
                                             tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:hadoop    文件:ReduceTask.java   
public SkippingReduceValuesIterator(RawKeyValueIterator in,
    RawComparator<KEY> comparator, Class<KEY> keyClass,
    Class<VALUE> valClass, Configuration conf, TaskReporter reporter,
    TaskUmbilicalProtocol umbilical) throws IOException {
  super(in, comparator, keyClass, valClass, conf, reporter);
  this.umbilical = umbilical;
  this.skipGroupCounter = 
    reporter.getCounter(TaskCounter.REDUCE_SKIPPED_GROUPS);
  this.skipRecCounter = 
    reporter.getCounter(TaskCounter.REDUCE_SKIPPED_RECORDS);
  this.toWriteSkipRecs = toWriteSkipRecs() &&  
    SkipBadRecords.getSkipOutputPath(conf)!=null;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.reporter = reporter;
  skipIt = getSkipRanges().skipRangeIterator();
  mayBeSkip();
}
项目:ditb    文件:IndexFile.java   
static BlockIndex readIndex(final RawComparator<byte []> c,
    DataInputStream in, final int indexSize)
throws IOException {
  BlockIndex bi = new BlockIndex(c);
  bi.blockOffsets = new long[indexSize];
  bi.blockKeys = new byte[indexSize][];
  bi.blockDataSizes = new int[indexSize];
  // If index size is zero, no index was written.
  if (indexSize > 0) {
    byte [] magic = new byte[INDEXBLOCKMAGIC.length];
    in.readFully(magic);
    if (!Arrays.equals(magic, INDEXBLOCKMAGIC)) {
      throw new IOException("Index block magic is wrong: " +
        Arrays.toString(magic));
    }
    for (int i = 0; i < indexSize; ++i ) {
      long offset   = in.readLong();
      int dataSize  = in.readInt();
      byte [] key = Bytes.readByteArray(in);
      bi.add(key, offset, dataSize);
    }
  }
  return bi;
}
项目:ditb    文件:Bytes.java   
/**
 * Binary search for keys in indexes.
 *
 * @param arr array of byte arrays to search for
 * @param key the key you want to find
 * @param offset the offset in the key you want to find
 * @param length the length of the key
 * @param comparator a comparator to compare.
 * @return zero-based index of the key, if the key is present in the array.
 *         Otherwise, a value -(i + 1) such that the key is between arr[i -
 *         1] and arr[i] non-inclusively, where i is in [0, i], if we define
 *         arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
 *         means that this function can return 2N + 1 different values
 *         ranging from -(N + 1) to N - 1.
 */
public static int binarySearch(byte [][]arr, byte []key, int offset,
    int length, RawComparator<?> comparator) {
  int low = 0;
  int high = arr.length - 1;

  while (low <= high) {
    int mid = (low+high) >>> 1;
    // we have to compare in this order, because the comparator order
    // has special logic when the 'left side' is a special key.
    int cmp = comparator.compare(key, offset, length,
        arr[mid], 0, arr[mid].length);
    // key lives above the midpoint
    if (cmp > 0)
      low = mid + 1;
    // key lives below the midpoint
    else if (cmp < 0)
      high = mid - 1;
    // BAM. how often does this really happen?
    else
      return mid;
  }
  return - (low+1);
}
项目:ditb    文件:Bytes.java   
/**
 * Binary search for keys in indexes.
 *
 * @param arr array of byte arrays to search for
 * @param key the key you want to find
 * @param comparator a comparator to compare.
 * @return zero-based index of the key, if the key is present in the array.
 *         Otherwise, a value -(i + 1) such that the key is between arr[i -
 *         1] and arr[i] non-inclusively, where i is in [0, i], if we define
 *         arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
 *         means that this function can return 2N + 1 different values
 *         ranging from -(N + 1) to N - 1.
 * @return the index of the block
 */
public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) {
  int low = 0;
  int high = arr.length - 1;
  KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
  while (low <= high) {
    int mid = (low+high) >>> 1;
    // we have to compare in this order, because the comparator order
    // has special logic when the 'left side' is a special key.
    r.setKey(arr[mid], 0, arr[mid].length);
    int cmp = comparator.compare(key, r);
    // key lives above the midpoint
    if (cmp > 0)
      low = mid + 1;
    // key lives below the midpoint
    else if (cmp < 0)
      high = mid - 1;
    // BAM. how often does this really happen?
    else
      return mid;
  }
  return - (low+1);
}
项目:aliyun-oss-hadoop-fs    文件:Task.java   
@SuppressWarnings("unchecked")
NewCombinerRunner(Class reducerClass,
                  JobConf job,
                  org.apache.hadoop.mapreduce.TaskAttemptID taskId,
                  org.apache.hadoop.mapreduce.TaskAttemptContext context,
                  Counters.Counter inputCounter,
                  TaskReporter reporter,
                  org.apache.hadoop.mapreduce.OutputCommitter committer) {
  super(inputCounter, job, reporter);
  this.reducerClass = reducerClass;
  this.taskId = taskId;
  keyClass = (Class<K>) context.getMapOutputKeyClass();
  valueClass = (Class<V>) context.getMapOutputValueClass();
  comparator = (RawComparator<K>) context.getCombinerKeyGroupingComparator();
  this.committer = committer;
}
项目:aliyun-oss-hadoop-fs    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass, 
                          CompressionCodec codec,
                          Path[] inputs, boolean deleteInputs, 
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator,
                          Progressable reporter,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Counters.Counter mergedMapOutputsCounter,
                          Progress mergePhase)
throws IOException {
  return 
    new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, 
                         reporter, mergedMapOutputsCounter,
                         TaskType.REDUCE).merge(
                                         keyClass, valueClass,
                                         mergeFactor, tmpDir,
                                         readsCounter, writesCounter,
                                         mergePhase);
}
项目:aliyun-oss-hadoop-fs    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          List<Segment<K, V>> segments,
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                             mergeFactor, tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:aliyun-oss-hadoop-fs    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          CompressionCodec codec,
                          List<Segment<K, V>> segments,
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase,
                          TaskType taskType)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments, codec,
                         taskType).merge(keyClass, valueClass,
                                             mergeFactor, tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:aliyun-oss-hadoop-fs    文件:Merger.java   
public static <K extends Object, V extends Object>
  RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          List<Segment<K, V>> segments,
                          int mergeFactor, int inMemSegments, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                             mergeFactor, inMemSegments,
                                             tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:big-c    文件:MergeManagerImpl.java   
private void combineAndSpill(
    RawKeyValueIterator kvIter,
    Counters.Counter inCounter) throws IOException {
  JobConf job = jobConf;
  Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
  Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
  Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
  RawComparator<K> comparator = 
    (RawComparator<K>)job.getCombinerKeyGroupingComparator();
  try {
    CombineValuesIterator values = new CombineValuesIterator(
        kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
        inCounter);
    while (values.more()) {
      combiner.reduce(values.getKey(), values, combineCollector,
                      Reporter.NULL);
      values.nextKey();
    }
  } finally {
    combiner.close();
  }
}
项目:aliyun-oss-hadoop-fs    文件:ReduceTask.java   
public SkippingReduceValuesIterator(RawKeyValueIterator in,
    RawComparator<KEY> comparator, Class<KEY> keyClass,
    Class<VALUE> valClass, Configuration conf, TaskReporter reporter,
    TaskUmbilicalProtocol umbilical) throws IOException {
  super(in, comparator, keyClass, valClass, conf, reporter);
  this.umbilical = umbilical;
  this.skipGroupCounter = 
    reporter.getCounter(TaskCounter.REDUCE_SKIPPED_GROUPS);
  this.skipRecCounter = 
    reporter.getCounter(TaskCounter.REDUCE_SKIPPED_RECORDS);
  this.toWriteSkipRecs = toWriteSkipRecs() &&  
    SkipBadRecords.getSkipOutputPath(conf)!=null;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.reporter = reporter;
  skipIt = getSkipRanges().skipRangeIterator();
  mayBeSkip();
}
项目:aliyun-oss-hadoop-fs    文件:MergeManagerImpl.java   
private void combineAndSpill(
    RawKeyValueIterator kvIter,
    Counters.Counter inCounter) throws IOException {
  JobConf job = jobConf;
  Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
  Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
  Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
  RawComparator<K> comparator = 
    (RawComparator<K>)job.getCombinerKeyGroupingComparator();
  try {
    CombineValuesIterator values = new CombineValuesIterator(
        kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
        inCounter);
    while (values.more()) {
      combiner.reduce(values.getKey(), values, combineCollector,
                      Reporter.NULL);
      values.nextKey();
    }
  } finally {
    combiner.close();
  }
}
项目:big-c    文件:Task.java   
@SuppressWarnings("unchecked")
NewCombinerRunner(Class reducerClass,
                  JobConf job,
                  org.apache.hadoop.mapreduce.TaskAttemptID taskId,
                  org.apache.hadoop.mapreduce.TaskAttemptContext context,
                  Counters.Counter inputCounter,
                  TaskReporter reporter,
                  org.apache.hadoop.mapreduce.OutputCommitter committer) {
  super(inputCounter, job, reporter);
  this.reducerClass = reducerClass;
  this.taskId = taskId;
  keyClass = (Class<K>) context.getMapOutputKeyClass();
  valueClass = (Class<V>) context.getMapOutputValueClass();
  comparator = (RawComparator<K>) context.getCombinerKeyGroupingComparator();
  this.committer = committer;
}
项目:big-c    文件:Merger.java   
public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          CompressionCodec codec,
                          List<Segment<K, V>> segments,
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase,
                          TaskType taskType)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments, codec,
                         taskType).merge(keyClass, valueClass,
                                             mergeFactor, tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:big-c    文件:Merger.java   
public static <K extends Object, V extends Object>
  RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          List<Segment<K, V>> segments,
                          int mergeFactor, int inMemSegments, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                             mergeFactor, inMemSegments,
                                             tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
项目:big-c    文件:ReduceTask.java   
public SkippingReduceValuesIterator(RawKeyValueIterator in,
    RawComparator<KEY> comparator, Class<KEY> keyClass,
    Class<VALUE> valClass, Configuration conf, TaskReporter reporter,
    TaskUmbilicalProtocol umbilical) throws IOException {
  super(in, comparator, keyClass, valClass, conf, reporter);
  this.umbilical = umbilical;
  this.skipGroupCounter = 
    reporter.getCounter(TaskCounter.REDUCE_SKIPPED_GROUPS);
  this.skipRecCounter = 
    reporter.getCounter(TaskCounter.REDUCE_SKIPPED_RECORDS);
  this.toWriteSkipRecs = toWriteSkipRecs() &&  
    SkipBadRecords.getSkipOutputPath(conf)!=null;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.reporter = reporter;
  skipIt = getSkipRanges().skipRangeIterator();
  mayBeSkip();
}
项目:hadoop-oss    文件:TFile.java   
@SuppressWarnings("unchecked")
static BytesComparator makeComparator(String comparator) {
  if (comparator.length() == 0) {
    // unsorted keys
    return null;
  }
  if (comparator.equals(COMPARATOR_MEMCMP)) {
    // default comparator
    return new BytesComparator(new MemcmpRawComparator());
  } else if (comparator.startsWith(COMPARATOR_JCLASS)) {
    String compClassName =
        comparator.substring(COMPARATOR_JCLASS.length()).trim();
    try {
      Class compClass = Class.forName(compClassName);
      // use its default ctor to create an instance
      return new BytesComparator((RawComparator<Object>) compClass
          .newInstance());
    } catch (Exception e) {
      throw new IllegalArgumentException(
          "Failed to instantiate comparator: " + comparator + "("
              + e.toString() + ")");
    }
  } else {
    throw new IllegalArgumentException("Unsupported comparator: "
        + comparator);
  }
}
项目:lustre-connector-for-hadoop    文件:LustreFsShuffle.java   
@SuppressWarnings("unchecked")
public RawKeyValueIterator finish() throws Throwable {
    // merge config params
    Class<K> keyClass = (Class<K>) jobConf.getMapOutputKeyClass();
    Class<V> valueClass = (Class<V>) jobConf.getMapOutputValueClass();
    final RawComparator<K> comparator = (RawComparator<K>) jobConf.getOutputKeyComparator();

    // Wait for on-going merges to complete
    merger.close();

    LOG.info("finalMerge called with " + segmentsToBeMerged.size() + " on-disk map-outputs");

    List<Segment<K, V>> segments = new ArrayList<Segment<K, V>>();
    long onDiskBytes = 0;

    for (Segment<K, V> segment : segmentsToBeMerged) {
        long fileLength = segment.getLength();
        onDiskBytes += fileLength;
        LOG.debug("Disk file: " + segment + " Length is " + fileLength);
        segments.add(segment);
    }
    segmentsToBeMerged.clear();

    LOG.info("Merging " + segmentsToBeMerged.size() + " files, " + onDiskBytes + " bytes from disk");
    Collections.sort(segments, new Comparator<Segment<K, V>>() {

        public int compare(Segment<K, V> o1, Segment<K, V> o2) {
            if (o1.getLength() == o2.getLength()) {
                return 0;
            }
            return o1.getLength() < o2.getLength() ? -1 : 1;
        }
    });
    return Merger.merge(jobConf, lustrefs, keyClass, valueClass, segments, segments.size(), mergeTempDir,
                        comparator, reporter, spilledRecordsCounter, null, null);
}
项目:hadoop    文件:TestTotalOrderPartitioner.java   
public void testTotalOrderCustomComparator() throws Exception {
  TotalOrderPartitioner<Text,NullWritable> partitioner =
    new TotalOrderPartitioner<Text,NullWritable>();
  Configuration conf = new Configuration();
  Text[] revSplitStrings = Arrays.copyOf(splitStrings, splitStrings.length);
  Arrays.sort(revSplitStrings, new ReverseStringComparator());
  Path p = TestTotalOrderPartitioner.<Text>writePartitionFile(
      "totalordercustomcomparator", conf, revSplitStrings);
  conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false);
  conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
  conf.setClass(MRJobConfig.KEY_COMPARATOR,
    ReverseStringComparator.class, RawComparator.class);
  ArrayList<Check<Text>> revCheck = new ArrayList<Check<Text>>();
  revCheck.add(new Check<Text>(new Text("aaaaa"), 9));
  revCheck.add(new Check<Text>(new Text("aaabb"), 9));
  revCheck.add(new Check<Text>(new Text("aabbb"), 9));
  revCheck.add(new Check<Text>(new Text("aaaaa"), 9));
  revCheck.add(new Check<Text>(new Text("babbb"), 8));
  revCheck.add(new Check<Text>(new Text("baabb"), 8));
  revCheck.add(new Check<Text>(new Text("yai"), 1));
  revCheck.add(new Check<Text>(new Text("yak"), 1));
  revCheck.add(new Check<Text>(new Text("z"), 0));
  revCheck.add(new Check<Text>(new Text("ddngo"), 4));
  revCheck.add(new Check<Text>(new Text("hi"), 3));
  try {
    partitioner.setConf(conf);
    NullWritable nw = NullWritable.get();
    for (Check<Text> chk : revCheck) {
      assertEquals(chk.data.toString(), chk.part,
          partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
    }
  } finally {
    p.getFileSystem(conf).delete(p, true);
  }
}
项目:hadoop    文件:Task.java   
public CombineValuesIterator(RawKeyValueIterator in,
    RawComparator<KEY> comparator, Class<KEY> keyClass,
    Class<VALUE> valClass, Configuration conf, Reporter reporter,
    Counters.Counter combineInputCounter) throws IOException {
  super(in, comparator, keyClass, valClass, conf, reporter);
  this.combineInputCounter = combineInputCounter;
}
项目:hadoop    文件:Task.java   
@SuppressWarnings("unchecked")
protected static <INKEY,INVALUE,OUTKEY,OUTVALUE> 
org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context
createReduceContext(org.apache.hadoop.mapreduce.Reducer
                      <INKEY,INVALUE,OUTKEY,OUTVALUE> reducer,
                    Configuration job,
                    org.apache.hadoop.mapreduce.TaskAttemptID taskId, 
                    RawKeyValueIterator rIter,
                    org.apache.hadoop.mapreduce.Counter inputKeyCounter,
                    org.apache.hadoop.mapreduce.Counter inputValueCounter,
                    org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> output, 
                    org.apache.hadoop.mapreduce.OutputCommitter committer,
                    org.apache.hadoop.mapreduce.StatusReporter reporter,
                    RawComparator<INKEY> comparator,
                    Class<INKEY> keyClass, Class<INVALUE> valueClass
) throws IOException, InterruptedException {
  org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE> 
  reduceContext = 
    new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(job, taskId, 
                                                            rIter, 
                                                            inputKeyCounter, 
                                                            inputValueCounter, 
                                                            output, 
                                                            committer, 
                                                            reporter, 
                                                            comparator, 
                                                            keyClass, 
                                                            valueClass);

  org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context 
      reducerContext = 
        new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext(
            reduceContext);

  return reducerContext;
}
项目:hadoop    文件:Merger.java   
public MergeQueue(Configuration conf, FileSystem fs, 
                  Path[] inputs, boolean deleteInputs, 
                  CompressionCodec codec, RawComparator<K> comparator,
                  Progressable reporter, 
                  Counters.Counter mergedMapOutputsCounter,
                  TaskType taskType) 
throws IOException {
  this.conf = conf;
  this.fs = fs;
  this.codec = codec;
  this.comparator = comparator;
  this.reporter = reporter;

  if (taskType == TaskType.MAP) {
    considerFinalMergeForProgress();
  }

  for (Path file : inputs) {
    LOG.debug("MergeQ: adding: " + file);
    segments.add(new Segment<K, V>(conf, fs, file, codec, !deleteInputs, 
                                   (file.toString().endsWith(
                                       Task.MERGED_OUTPUT_PREFIX) ? 
                                    null : mergedMapOutputsCounter)));
  }

  // Sort segments on file-lengths
  Collections.sort(segments, segmentComparator); 
}
项目:hadoop    文件:Merger.java   
public MergeQueue(Configuration conf, FileSystem fs, 
    List<Segment<K, V>> segments, RawComparator<K> comparator,
    Progressable reporter, boolean sortSegments, TaskType taskType) {
  this.conf = conf;
  this.fs = fs;
  this.comparator = comparator;
  this.segments = segments;
  this.reporter = reporter;
  if (taskType == TaskType.MAP) {
    considerFinalMergeForProgress();
  }
  if (sortSegments) {
    Collections.sort(segments, segmentComparator);
  }
}
项目:hadoop    文件:Merger.java   
public MergeQueue(Configuration conf, FileSystem fs,
    List<Segment<K, V>> segments, RawComparator<K> comparator,
    Progressable reporter, boolean sortSegments, CompressionCodec codec,
    TaskType taskType) {
  this(conf, fs, segments, comparator, reporter, sortSegments,
      taskType);
  this.codec = codec;
}
项目:hadoop    文件:ReduceTask.java   
public ReduceValuesIterator (RawKeyValueIterator in,
                             RawComparator<KEY> comparator, 
                             Class<KEY> keyClass,
                             Class<VALUE> valClass,
                             Configuration conf, Progressable reporter)
  throws IOException {
  super(in, comparator, keyClass, valClass, conf, reporter);
}
项目:hadoop    文件:JobConf.java   
/**
 * Get the {@link RawComparator} comparator used to compare keys.
 * 
 * @return the {@link RawComparator} comparator used to compare keys.
 */
public RawComparator getOutputKeyComparator() {
  Class<? extends RawComparator> theClass = getClass(
    JobContext.KEY_COMPARATOR, null, RawComparator.class);
  if (theClass != null)
    return ReflectionUtils.newInstance(theClass, this);
  return WritableComparator.get(getMapOutputKeyClass().asSubclass(WritableComparable.class), this);
}
项目:hadoop    文件:JobConf.java   
/**
 * Get the user defined {@link WritableComparable} comparator for
 * grouping keys of inputs to the combiner.
 *
 * @return comparator set by the user for grouping values.
 * @see #setCombinerKeyGroupingComparator(Class) for details.
 */
public RawComparator getCombinerKeyGroupingComparator() {
  Class<? extends RawComparator> theClass = getClass(
      JobContext.COMBINER_GROUP_COMPARATOR_CLASS, null, RawComparator.class);
  if (theClass == null) {
    return getOutputKeyComparator();
  }

  return ReflectionUtils.newInstance(theClass, this);
}
项目:hadoop    文件:JobConf.java   
/** 
 * Get the user defined {@link WritableComparable} comparator for 
 * grouping keys of inputs to the reduce.
 * 
 * @return comparator set by the user for grouping values.
 * @see #setOutputValueGroupingComparator(Class) for details.
 */
public RawComparator getOutputValueGroupingComparator() {
  Class<? extends RawComparator> theClass = getClass(
    JobContext.GROUP_COMPARATOR_CLASS, null, RawComparator.class);
  if (theClass == null) {
    return getOutputKeyComparator();
  }

  return ReflectionUtils.newInstance(theClass, this);
}
项目:hadoop    文件:TotalOrderPartitioner.java   
/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
 */
@SuppressWarnings("unchecked") // keytype from conf not static
public void setConf(Configuration conf) {
  try {
    this.conf = conf;
    String parts = getPartitionFile(conf);
    final Path partFile = new Path(parts);
    final FileSystem fs = (DEFAULT_PATH.equals(parts))
      ? FileSystem.getLocal(conf)     // assume in DistributedCache
      : partFile.getFileSystem(conf);

    Job job = Job.getInstance(conf);
    Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
    K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
    if (splitPoints.length != job.getNumReduceTasks() - 1) {
      throw new IOException("Wrong number of partitions in keyset");
    }
    RawComparator<K> comparator =
      (RawComparator<K>) job.getSortComparator();
    for (int i = 0; i < splitPoints.length - 1; ++i) {
      if (comparator.compare(splitPoints[i], splitPoints[i+1]) >= 0) {
        throw new IOException("Split points are out of order");
      }
    }
    boolean natOrder =
      conf.getBoolean(NATURAL_ORDER, true);
    if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
      partitions = buildTrie((BinaryComparable[])splitPoints, 0,
          splitPoints.length, new byte[0],
          // Now that blocks of identical splitless trie nodes are 
          // represented reentrantly, and we develop a leaf for any trie
          // node with only one split point, the only reason for a depth
          // limit is to refute stack overflow or bloat in the pathological
          // case where the split points are long and mostly look like bytes 
          // iii...iixii...iii   .  Therefore, we make the default depth
          // limit large but not huge.
          conf.getInt(MAX_TRIE_DEPTH, 200));
    } else {
      partitions = new BinarySearchNode(splitPoints, comparator);
    }
  } catch (IOException e) {
    throw new IllegalArgumentException("Can't read partitions file", e);
  }
}