Java 类org.apache.hadoop.mapreduce.Partitioner 实例源码

项目:gora-boot    文件:GoraMapper.java   
/**
 * Initializes the Mapper, and sets input parameters for the job. All of 
 * the records in the dataStore are used as the input. If you want to 
 * include a specific subset, use one of the overloaded methods which takes
 * query parameter.
 * @param job the job to set the properties for
 * @param dataStoreClass the datastore class
 * @param inKeyClass Map input key class
 * @param inValueClass Map input value class
 * @param outKeyClass Map output key class
 * @param outValueClass Map output value class
 * @param mapperClass the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects whether to reuse objects in serialization
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job,
    Class<? extends DataStore<K1,V1>> dataStoreClass,
    Class<K1> inKeyClass, 
    Class<V1> inValueClass,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {

  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, dataStoreClass, inKeyClass, inValueClass, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}
项目:gora-boot    文件:GoraMapper.java   
/**
 * Initializes the Mapper, and sets input parameters for the job
 * @param job the job to set the properties for
 * @param query the query to get the inputs from
 * @param dataStore the datastore as the input
 * @param outKeyClass Map output key class
 * @param outValueClass Map output value class
 * @param mapperClass the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects whether to reuse objects in serialization
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job, 
    Query<K1,V1> query,
    DataStore<K1,V1> dataStore,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {
  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, query, dataStore, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}
项目:ignite    文件:HadoopTeraSortTest.java   
/**
 * Extracts package-private TeraSort total order partitioner class.
 *
 * @return The class.
 */
@SuppressWarnings("unchecked")
private Class<? extends Partitioner> getTeraSortTotalOrderPartitioner() {
    Class[] classes = TeraSort.class.getDeclaredClasses();

    Class<? extends Partitioner> totalOrderPartitionerCls = null;

    for (Class<?> x: classes) {
        if ("TotalOrderPartitioner".equals(x.getSimpleName())) {
            totalOrderPartitionerCls = (Class<? extends Partitioner>)x;

            break;
        }
    }

    if (totalOrderPartitionerCls == null)
        throw new IllegalStateException("Failed to find TeraSort total order partitioner class.");

    return totalOrderPartitionerCls;
}
项目:hadoop-in-action    文件:LookupRecordByTemperature.java   
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        JobBuilder.printUsage(this, "<path> <key>");
        return -1;
    }
    Path path = new Path(args[0]);
    IntWritable key = new IntWritable(Integer.parseInt(args[1]));

    Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    Text val = new Text();
    Writable entry = MapFileOutputFormat.getEntry(readers, partitioner,
            key, val);
    if (entry == null) {
        System.err.println("Key not found: " + key);
        return -1;
    }
    NcdcRecordParser parser = new NcdcRecordParser();
    parser.parse(val.toString());
    System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
    return 0;
}
项目:gora    文件:GoraMapper.java   
/**
 * Initializes the Mapper, and sets input parameters for the job. All of
 * the records in the dataStore are used as the input. If you want to
 * include a specific subset, use one of the overloaded methods which takes
 * query parameter.
 *
 * @param job              the job to set the properties for
 * @param dataStoreClass   the datastore class
 * @param inKeyClass       Map input key class
 * @param inValueClass     Map input value class
 * @param outKeyClass      Map output key class
 * @param outValueClass    Map output value class
 * @param mapperClass      the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects     whether to reuse objects in serialization
 * @param <K1> Map input key class
 * @param <V1> Map input value class
 * @param <K2> Map output key class
 * @param <V2> Map output value class
 * @throws IOException if there is an error initializing the Map job
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job,
    Class<? extends DataStore<K1,V1>> dataStoreClass,
    Class<K1> inKeyClass, 
    Class<V1> inValueClass,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {

  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, dataStoreClass, inKeyClass, inValueClass, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}
项目:gora    文件:GoraMapper.java   
/**
 * Initializes the Mapper, and sets input parameters for the job
 *
 * @param job              the job to set the properties for
 * @param query            the query to get the inputs from
 * @param outKeyClass      Map output key class
 * @param outValueClass    Map output value class
 * @param mapperClass      the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects     whether to reuse objects in serialization
 * @param <K1> Map input key class
 * @param <V1> Map input value class
 * @param <K2> Map output key class
 * @param <V2> Map output value class
 * @throws IOException if there is an error initializing the Map job
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job, 
    Query<K1,V1> query,
    Class<K2> outKeyClass,
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {
  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, query, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}
项目:gora-0.3-simplified    文件:GoraMapper.java   
/**
 * Initializes the Mapper, and sets input parameters for the job. All of 
 * the records in the dataStore are used as the input. If you want to 
 * include a specific subset, use one of the overloaded methods which takes
 * query parameter.
 * @param job the job to set the properties for
 * @param dataStoreClass the datastore class
 * @param inKeyClass Map input key class
 * @param inValueClass Map input value class
 * @param outKeyClass Map output key class
 * @param outValueClass Map output value class
 * @param mapperClass the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects whether to reuse objects in serialization
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job,
    Class<? extends DataStore<K1,V1>> dataStoreClass,
    Class<K1> inKeyClass, 
    Class<V1> inValueClass,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {

  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, dataStoreClass, inKeyClass, inValueClass, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}
项目:gora-0.3-simplified    文件:GoraMapper.java   
/**
 * Initializes the Mapper, and sets input parameters for the job
 * @param job the job to set the properties for
 * @param query the query to get the inputs from
 * @param dataStore the datastore as the input
 * @param outKeyClass Map output key class
 * @param outValueClass Map output value class
 * @param mapperClass the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects whether to reuse objects in serialization
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job, 
    Query<K1,V1> query,
    DataStore<K1,V1> dataStore,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {
  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, query, dataStore, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}
项目:incubator-blur    文件:TableShardCountCollapserTest.java   
private void assertData(int totalShardCount) throws IOException {
  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  for (int i = 0; i < totalShardCount; i++) {
    HdfsDirectory directory = new HdfsDirectory(configuration, new Path(path, ShardUtil.getShardName(i)));
    DirectoryReader reader = DirectoryReader.open(directory);
    int numDocs = reader.numDocs();
    for (int d = 0; d < numDocs; d++) {
      Document document = reader.document(d);
      IndexableField field = document.getField("id");
      Integer id = (Integer) field.numericValue();
      int partition = partitioner.getPartition(new IntWritable(id), null, totalShardCount);
      assertEquals(i, partition);
    }
    reader.close();
  }
}
项目:incubator-blur    文件:TableShardCountCollapserTest.java   
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
    throws IOException {
  HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
  assertEquals(i, partition);

  Document doc = getDoc(i);
  indexWriter.addDocument(doc);
  indexWriter.close();
}
项目:gora-oraclenosql    文件:GoraMapper.java   
/**
 * Initializes the Mapper, and sets input parameters for the job. All of 
 * the records in the dataStore are used as the input. If you want to 
 * include a specific subset, use one of the overloaded methods which takes
 * query parameter.
 * @param job the job to set the properties for
 * @param dataStoreClass the datastore class
 * @param inKeyClass Map input key class
 * @param inValueClass Map input value class
 * @param outKeyClass Map output key class
 * @param outValueClass Map output value class
 * @param mapperClass the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects whether to reuse objects in serialization
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job,
    Class<? extends DataStore<K1,V1>> dataStoreClass,
    Class<K1> inKeyClass, 
    Class<V1> inValueClass,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {

  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, dataStoreClass, inKeyClass, inValueClass, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}
项目:gora-oraclenosql    文件:GoraMapper.java   
/**
 * Initializes the Mapper, and sets input parameters for the job
 * @param job the job to set the properties for
 * @param query the query to get the inputs from
 * @param dataStore the datastore as the input
 * @param outKeyClass Map output key class
 * @param outValueClass Map output value class
 * @param mapperClass the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects whether to reuse objects in serialization
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job, 
    Query<K1,V1> query,
    DataStore<K1,V1> dataStore,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {
  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, query, dataStore, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}
项目:oryx    文件:JobStep.java   
protected final GroupingOptions groupingOptions(
    Class<? extends Partitioner> partitionerClass,
    Class<? extends RawComparator<?>> groupingComparator,
    Class<? extends RawComparator<?>> sortComparator) {
  GroupingOptions.Builder b = GroupingOptions.builder()
      .partitionerClass(partitionerClass)
      .numReducers(getNumReducers());

  if (groupingComparator != null) {
    b.groupingComparatorClass(groupingComparator);
  }
  if (sortComparator != null) {
    b.sortComparatorClass(sortComparator);
  }
  return b.build();
}
项目:hadoop    文件:MapFileOutputFormat.java   
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}
项目:hadoop    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:aliyun-oss-hadoop-fs    文件:MapFileOutputFormat.java   
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int readerLength = readers.length;
  int part;
  if (readerLength <= 1) {
    part = 0;
  } else {
    part = partitioner.getPartition(key, value, readers.length);
  }
  return readers[part].get(key, value);
}
项目:aliyun-oss-hadoop-fs    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:elephant56    文件:GlobalDistributedDriver.java   
public Job createJob(
        Configuration configuration,
        int numberOfNodes,
        long currentGenerationNumber,
        String generationNameFormat,
        Path currentGenerationsBlockReportsFolderPath,
        Schema individualWrapperSchema
) throws IOException {
    // Creates a job.
    Job job = super.createJob(configuration, numberOfNodes, currentGenerationNumber, currentGenerationNumber,
            (currentGenerationNumber - 1L), currentGenerationNumber, generationNameFormat,
            currentGenerationsBlockReportsFolderPath, individualWrapperSchema,
            GlobalMapper.class, Partitioner.class, Reducer.class);

    // Sets the input.
    NodesInputFormat.setInputPopulationFolderPath(job, this.getInputFolderPath());
    NodesInputFormat.activateInitialisation(job, false);

    // Configures the fitness value class.
    job.getConfiguration().setClass(Constants.CONFIGURATION_FITNESS_VALUE_CLASS, this.fitnessValueClass,
            FitnessValue.class);

    // Configures the Fitness Evaluation phase.
    job.getConfiguration().setClass(Constants.CONFIGURATION_FITNESS_EVALUATION_CLASS, this.fitnessEvaluationClass,
            FitnessEvaluation.class);

    // Disables the reducer.
    job.setNumReduceTasks(0);

    // Returns the job.
    return job;
}
项目:big-c    文件:MapFileOutputFormat.java   
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}
项目:big-c    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:LCIndex-HBase-0.94.16    文件:HFileOutputFormat.java   
/**
 * If > hadoop 0.20, then we want to use the hadoop TotalOrderPartitioner.
 * If 0.20, then we want to use the TOP that we have under hadoopbackport.
 * This method is about hbase being able to run on different versions of
 * hadoop.  In 0.20.x hadoops, we have to use the TOP that is bundled with
 * hbase.  Otherwise, we use the one in Hadoop.
 * @return Instance of the TotalOrderPartitioner class
 * @throws ClassNotFoundException If can't find a TotalOrderPartitioner.
 */
private static Class<? extends Partitioner> getTotalOrderPartitionerClass()
throws ClassNotFoundException {
  Class<? extends Partitioner> clazz = null;
  try {
    clazz = (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner");
  } catch (ClassNotFoundException e) {
    clazz =
      (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner");
  }
  return clazz;
}
项目:hadoop-2.6.0-cdh5.4.3    文件:MapFileOutputFormat.java   
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:MapFileOutputFormat.java   
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-plus    文件:MapFileOutputFormat.java   
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}
项目:hadoop-plus    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:Gaffer    文件:AccumuloAddElementsFromHdfsJobFactoryTest.java   
private void setupAccumuloPartitionerWithGivenPartitioner(final Class<? extends Partitioner> partitioner) throws IOException {
    // Given
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    try (final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(splitsFile), true)))) {
        writer.write("1");
    }

    final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
    final Job job = mock(Job.class);
    final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder()
            .outputPath(outputDir)
            .partitioner(partitioner)
            .useProvidedSplits(true)
            .splitsFilePath(splitsFile)
            .build();
    final AccumuloStore store = mock(AccumuloStore.class);
    given(job.getConfiguration()).willReturn(localConf);

    // When
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);

    // Then
    if (NoPartitioner.class.equals(partitioner)) {
        verify(job, never()).setNumReduceTasks(Mockito.anyInt());
        verify(job, never()).setPartitionerClass(Mockito.any(Class.class));
        assertNull(job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
    } else {
        verify(job).setNumReduceTasks(2);
        verify(job).setPartitionerClass(GafferKeyRangePartitioner.class);
        assertEquals(splitsFile, job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
    }
}
项目:Gaffer    文件:AddElementsFromHdfsTest.java   
@Test
public void shouldJSONSerialiseAndDeserialise() throws SerialisationException {
    // Given
    final Map<String, String> inputMapperPairs = new HashMap<>();
    inputMapperPairs.put("inputPath", MapperGenerator.class.getName());
    final AddElementsFromHdfs addElements = new AddElementsFromHdfs.Builder()
            .inputMapperPairs(inputMapperPairs)
            .outputPath("outputPath")
            .failurePath("failurePath")
            .jobInitialiser(new TextJobInitialiser())
            .partitioner(Partitioner.class)
            .mappers(5)
            .reducers(10)
            .splitsFilePath("/path/to/splits/file")
            .useProvidedSplits(false)
            .build();

    // When
    String json = new String(JSONSerialiser.serialise(addElements, true));

    // Then
    JsonAssert.assertEquals(String.format("{%n" +
            "  \"class\" : \"uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs\",%n" +
            "  \"failurePath\" : \"failurePath\",%n" +
            "  \"validate\" : true,%n" +
            "  \"inputMapperPairs\" : { \"inputPath\" :\"uk.gov.gchq.gaffer.hdfs.operation.mapper.generator.MapperGenerator\"},%n" +
            "  \"outputPath\" : \"outputPath\",%n" +
            "  \"jobInitialiser\" : {%n" +
            "    \"class\" : \"uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser\"%n" +
            "  },%n" +
            "  \"numMapTasks\" : 5,%n" +
            "  \"numReduceTasks\" : 10,%n" +
            "  \"splitsFilePath\" : \"/path/to/splits/file\",%n" +
            "  \"partitioner\" : \"org.apache.hadoop.mapreduce.Partitioner\"%n" +
            "}"), json);
}
项目:terrapin    文件:BaseUploader.java   
/**
 * Validates the first non-empty partition hfile has right partitioning function.
 * It reads several keys, then calculates the partition according to the partitioning function
 * client offering. If the calculated partition number is different with actual partition number
 * an exception is thrown. If all partition hfiles are empty, an exception is thrown.
 *
 * @param parts full absolute path for all partitions
 * @param partitionerType type of paritioning function
 * @param numShards total number of partitions
 * @throws IOException if something goes wrong when reading the hfiles
 * @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty
 */
public void validate(List<Path> parts, PartitionerType partitionerType, int numShards)
    throws IOException {
  boolean hasNonEmptyPartition = false;
  HColumnDescriptor columnDescriptor = new HColumnDescriptor();
  // Disable block cache to ensure it reads the actual file content.
  columnDescriptor.setBlockCacheEnabled(false);
  for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) {
    Path fileToBeValidated = parts.get(shardIndex);
    HFile.Reader reader = null;
    try {
      FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf);
      CacheConfig cc = new CacheConfig(conf, columnDescriptor);
      reader = HFile.createReader(fs, fileToBeValidated, cc);
      Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
      byte[] rowKey = reader.getFirstRowKey();
      if (rowKey == null) {
        LOG.warn(String.format("empty partition %s", fileToBeValidated.toString()));
        reader.close();
        continue;
      }
      hasNonEmptyPartition = true;
      BytesWritable key = new BytesWritable(rowKey);
      int partition = partitioner.getPartition(key, null,  numShards);
      if (partition != shardIndex) {
        throw new IllegalArgumentException(
            String.format("wrong partition type %s for key %s in partition %d, expected %d",
                partitionerType.toString(), new String(key.getBytes()), shardIndex, partition)
        );
      }
    } finally {
      if (reader != null) {
        reader.close();
      }
    }
  }
  if (!hasNonEmptyPartition) {
    throw new IllegalArgumentException("all partitions are empty");
  }
}
项目:terrapin    文件:PartitionerFactory.java   
/**
 * Get the partitioner. If shardFunction is "ShardFunction.CASCADING", return
 * CascadingPartitioner. Otherwise, return HashPartitioner.
 */
public static Partitioner getPartitioner(PartitionerType type) {
  if (type.equals(PartitionerType.CASCADING)) {
    return CASCADING_PARTITIONER;
  } else if (type.equals(PartitionerType.MODULUS)) {
    return HASH_PARTITIONER;
  } else {
    throw new RuntimeException("Unsupported ShardFunction." + type);
  }
}
项目:terrapin    文件:TerrapinUtil.java   
public static String getPartitionName(ByteBuffer key,
                                      PartitionerType partitionerType,
                                      int numPartitions) {
  Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
  return Integer.toString(
      partitioner.getPartition(
          new BytesWritable(BytesUtil.readBytesFromByteBufferWithoutConsume(key)),
          null,
          numPartitions));
}
项目:terrapin    文件:HFileGenerator.java   
/**
 * Generate hfiles for testing purpose
 *
 * @param sourceFileSystem source file system
 * @param conf configuration for hfile
 * @param outputFolder output folder for generated hfiles
 * @param partitionerType partitioner type
 * @param numOfPartitions number of partitions
 * @param numOfKeys number of keys
 * @return list of generated hfiles
 * @throws IOException if hfile creation goes wrong
 */
public static List<Path> generateHFiles(FileSystem sourceFileSystem, Configuration conf,
                                        File outputFolder, PartitionerType partitionerType,
                                        int numOfPartitions, int numOfKeys)
    throws IOException {
  StoreFile.Writer[] writers = new StoreFile.Writer[numOfPartitions];
  for (int i = 0; i < numOfPartitions; i++) {
    writers[i] = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), sourceFileSystem, 4096)
        .withFilePath(new Path(String.format("%s/%s", outputFolder.getAbsoluteFile(),
            TerrapinUtil.formatPartitionName(i))))
        .withCompression(Compression.Algorithm.NONE)
        .build();
  }
  Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
  for (int i = 0; i < numOfKeys; i++) {
    byte[] key = String.format("%06d", i).getBytes();
    byte[] value;
    if (i <= 1) {
      value = "".getBytes();
    } else {
      value = ("v" + (i + 1)).getBytes();
    }
    KeyValue kv = new KeyValue(key, Bytes.toBytes("cf"), Bytes.toBytes(""), value);
    int partition = partitioner.getPartition(new BytesWritable(key), new BytesWritable(value),
        numOfPartitions);
    writers[partition].append(kv);
  }
  for (int i = 0; i < numOfPartitions; i++) {
    writers[i].close();
  }
  return Lists.transform(Lists.newArrayList(writers), new Function<StoreFile.Writer, Path>() {
    @Override
    public Path apply(StoreFile.Writer writer) {
      return writer.getPath();
    }
  });
}
项目:FlexMap    文件:MapFileOutputFormat.java   
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}
项目:FlexMap    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hops    文件:MapFileOutputFormat.java   
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}
项目:hops    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:incubator-asterixdb-hyracks    文件:HadoopHelper.java   
public ITuplePartitionComputerFactory getTuplePartitionComputer() throws HyracksDataException {
    int nReducers = job.getNumReduceTasks();
    try {
        return new HadoopNewPartitionerTuplePartitionComputerFactory<Writable, Writable>(
                (Class<? extends Partitioner<Writable, Writable>>) job.getPartitionerClass(),
                (ISerializerDeserializer<Writable>) DatatypeHelper
                        .createSerializerDeserializer((Class<? extends Writable>) job.getMapOutputKeyClass()),
                (ISerializerDeserializer<Writable>) DatatypeHelper
                        .createSerializerDeserializer((Class<? extends Writable>) job.getMapOutputValueClass()));
    } catch (ClassNotFoundException e) {
        throw new HyracksDataException(e);
    }
}
项目:IRIndex    文件:HFileOutputFormat.java   
/**
 * If > hadoop 0.20, then we want to use the hadoop TotalOrderPartitioner.
 * If 0.20, then we want to use the TOP that we have under hadoopbackport.
 * This method is about hbase being able to run on different versions of
 * hadoop.  In 0.20.x hadoops, we have to use the TOP that is bundled with
 * hbase.  Otherwise, we use the one in Hadoop.
 * @return Instance of the TotalOrderPartitioner class
 * @throws ClassNotFoundException If can't find a TotalOrderPartitioner.
 */
private static Class<? extends Partitioner> getTotalOrderPartitionerClass()
throws ClassNotFoundException {
  Class<? extends Partitioner> clazz = null;
  try {
    clazz = (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner");
  } catch (ClassNotFoundException e) {
    clazz =
      (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner");
  }
  return clazz;
}
项目:multireducers    文件:MultiPartitioner.java   
@SuppressWarnings("unchecked")
@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    Class<Partitioner>[] partitionersClass = (Class<Partitioner>[])
            conf.getClasses(CONF_KEY);
    partitioners = new ArrayList<Partitioner<Object, Object>>(partitionersClass.length);
    for (Class<Partitioner> partitionerClass : partitionersClass) {
        partitioners.add(ReflectionUtils.newInstance(partitionerClass, conf));
    }
    numReducers = Ints.asList(conf.getInts(NUM_REDUCERS_KEY));
}