Java 类org.apache.hadoop.mapreduce.lib.partition.HashPartitioner 实例源码

项目:hadoop-in-action    文件:LookupRecordByTemperature.java   
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        JobBuilder.printUsage(this, "<path> <key>");
        return -1;
    }
    Path path = new Path(args[0]);
    IntWritable key = new IntWritable(Integer.parseInt(args[1]));

    Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    Text val = new Text();
    Writable entry = MapFileOutputFormat.getEntry(readers, partitioner,
            key, val);
    if (entry == null) {
        System.err.println("Key not found: " + key);
        return -1;
    }
    NcdcRecordParser parser = new NcdcRecordParser();
    parser.parse(val.toString());
    System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
    return 0;
}
项目:incubator-blur    文件:TableShardCountCollapserTest.java   
private void assertData(int totalShardCount) throws IOException {
  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  for (int i = 0; i < totalShardCount; i++) {
    HdfsDirectory directory = new HdfsDirectory(configuration, new Path(path, ShardUtil.getShardName(i)));
    DirectoryReader reader = DirectoryReader.open(directory);
    int numDocs = reader.numDocs();
    for (int d = 0; d < numDocs; d++) {
      Document document = reader.document(d);
      IndexableField field = document.getField("id");
      Integer id = (Integer) field.numericValue();
      int partition = partitioner.getPartition(new IntWritable(id), null, totalShardCount);
      assertEquals(i, partition);
    }
    reader.close();
  }
}
项目:incubator-blur    文件:TableShardCountCollapserTest.java   
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
    throws IOException {
  HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
  assertEquals(i, partition);

  Document doc = getDoc(i);
  indexWriter.addDocument(doc);
  indexWriter.close();
}
项目:lembos    文件:LembosMapReduceRunnerTest.java   
/**
 * Test {@link LembosMapReduceRunner#initJob(String[])} works as expected for a map only job.
 *
 * @throws Exception if anything goes wrong
 */
@Test
public void testMapOnlyJob() throws Exception {
    final String moduleName = "LembosMapReduceRunnerTest-testMapOnlyJob";
    final String modulePath = TestUtils.getModulePath(moduleName);
    final Job job = getJob(moduleName, modulePath, null, null);

    assertNotNull(job.getMapperClass());
    assertNull(job.getCombinerClass());
    // assertNull(job.getGroupingComparator()); // Throws an exception because our map output key is
                                                // WritableComparable and can't subclass itself
    assertEquals(HashPartitioner.class, job.getPartitionerClass());
    assertEquals(Reducer.class, job.getReducerClass()); // Defaults to the Hadoop Reducer
    // assertNull(job.getSortComparator()); // Throws an exception because our map output key is
                                            // WritableComparable and can't subclass itself

    assertNull(job.getConfiguration().get("boolean"));
    assertNull(job.getConfiguration().get("double"));
    assertNull(job.getConfiguration().get("float"));
    assertNull(job.getConfiguration().get("int"));
    assertNull(job.getConfiguration().get("long"));
    assertNull(job.getConfiguration().get("string"));
}
项目:hadoop    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:aliyun-oss-hadoop-fs    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:big-c    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-EAR    文件:JobContext.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-plus    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:FlexMap    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hops    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:pagerank-hadoop    文件:PageRankTopNReducer.java   
@Override
protected void cleanup(Context context) throws IOException,
        InterruptedException {

    Configuration conf = context.getConfiguration();
    Path titlesDir = new Path(conf.get("pagerank.titles_dir"));

    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    IntWritable page = new IntWritable();
    Text title = new Text();

    float[] pageRanks = new float[topN.size()];
    String[] titles = new String[topN.size()];

    // The order of the entries is reversed. The priority queue is in
    // non-decreasing order and we want the highest PageRank first.
    for (int i = pageRanks.length - 1; i >= 0; i--) {
        Map.Entry<Float, Integer> entry = topN.poll();
        // Get the title of the page from the title index.
        page.set(entry.getValue());
        MapFileOutputFormat.getEntry(readers, partitioner, page, title);
        pageRanks[i] = entry.getKey();
        titles[i] = title.toString();
    }

    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    for (int i = 0; i < pageRanks.length; i++) {
        context.write(new FloatWritable(pageRanks[i]), new Text(titles[i]));
    }
}
项目:pagerank-hadoop    文件:InLinksTopNReducer.java   
@Override
protected void cleanup(Context context) throws IOException,
        InterruptedException {

    Configuration conf = context.getConfiguration();
    Path titlesDir = new Path(conf.get("inlinks.titles_dir"));

    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    IntWritable page = new IntWritable();
    Text title = new Text();

    int[] inLinks = new int[topN.size()];
    String[] titles = new String[topN.size()];

    for (int i = inLinks.length - 1; i >= 0; i--) {
        Map.Entry<Integer, Integer> entry = topN.poll();
        page.set(entry.getValue());
        MapFileOutputFormat.getEntry(readers, partitioner, page, title);
        inLinks[i] = entry.getKey();
        titles[i] = title.toString();
    }

    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    for (int i = 0; i < inLinks.length; i++) {
        context.write(new IntWritable(inLinks[i]), new Text(titles[i]));
    }
}
项目:hadoop-in-action    文件:LookupRecordsByTemperature.java   
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        JobBuilder.printUsage(this, "<path> <key>");
        return -1;
    }
    Path path = new Path(args[0]);
    IntWritable key = new IntWritable(Integer.parseInt(args[1]));

    Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    Text val = new Text();

    Reader reader = readers[partitioner.getPartition(key, val,
            readers.length)];
    Writable entry = reader.get(key, val);
    if (entry == null) {
        System.err.println("Key not found: " + key);
        return -1;
    }
    NcdcRecordParser parser = new NcdcRecordParser();
    IntWritable nextKey = new IntWritable();
    do {
        parser.parse(val.toString());
        System.out.printf("%s\t%s\n", parser.getStationId(),
                parser.getYear());
    } while (reader.next(nextKey, val) && key.equals(nextKey));
    return 0;
}
项目:hadoop-TCP    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-on-lustre    文件:JobContext.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hardfs    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-on-lustre2    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:RDFS    文件:JobContext.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-0.20    文件:JobContext.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:incubator-tez    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hanoi-hadoop-2.0.0-cdh    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:mapreduce-fork    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:tez    文件:JobContextImpl.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:bigdata_pattern    文件:MinimalMapReduceWithDefaults.java   
@Override
 public int run(String[] args) throws Exception {
if (args.length != 2) {
      System.err.printf("Usage: %s [generic options] <input> <output>\n",
          getClass().getSimpleName());
      ToolRunner.printGenericCommandUsage(System.err);
      return -1;
 }

Job job = new Job(getConf());
job.setJarByClass(getClass());
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
/* by default start */
   job.setInputFormatClass(TextInputFormat.class); 

   job.setMapperClass(Mapper.class); 

   job.setMapOutputKeyClass(LongWritable.class); 
   job.setMapOutputValueClass(Text.class); 

   job.setPartitionerClass(HashPartitioner.class); 

   job.setNumReduceTasks(1); 
   job.setReducerClass(Reducer.class); 

   job.setOutputKeyClass(LongWritable.class); 
   job.setOutputValueClass(Text.class); 

   job.setOutputFormatClass(TextOutputFormat.class);
   /* by default end */

   return job.waitForCompletion(true) ? 0 : 1;
 }
项目:hortonworks-extension    文件:JobContext.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hortonworks-extension    文件:JobContext.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:hadoop-gpu    文件:JobContext.java   
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
项目:asakusafw-compiler    文件:MapReduceStageEmitterTest.java   
/**
 * w/ reduce.
 * @throws Exception if failed
 */
@Test
public void reduce() throws Exception {
    FileEditor.put(new File(folder.getRoot(), "input/test.txt"), "Hello, world!");
    Path root = new Path(folder.getRoot().toURI());
    Path base = new Path(root, "output");
    ClassDescription client = new ClassDescription("com.example.StageClient");
    MapReduceStageInfo info = new MapReduceStageInfo(
            new StageInfo("simple", "simple", "simple"),
            Arrays.asList(new MapReduceStageInfo.Input(
                    new Path(root, "input/*.txt").toString(),
                    classOf(Text.class),
                    classOf(TextInputFormat.class),
                    classOf(Mapper.class),
                    Collections.emptyMap())),
            Arrays.asList(new MapReduceStageInfo.Output(
                    "out",
                    classOf(NullWritable.class),
                    classOf(Text.class),
                    classOf(TextOutputFormat.class),
                    Collections.emptyMap())),
            Collections.emptyList(),
            new MapReduceStageInfo.Shuffle(
                    classOf(LongWritable.class),
                    classOf(Text.class),
                    classOf(HashPartitioner.class),
                    null,
                    classOf(LongWritable.Comparator.class),
                    classOf(LongWritable.Comparator.class),
                    classOf(SimpleReducer.class)),
            base.toString());
    MapReduceStageEmitter.emit(client, info, javac);
    int status = MapReduceRunner.execute(
            new Configuration(),
            client,
            "testing",
            Collections.emptyMap(),
            javac.compile());
    assertThat("exit status code", status, is(0));
    assertThat(collect("output"), contains("Hello, world!"));
}
项目:pagerank-hadoop    文件:PageRankIterationMapper.java   
@Override
public void map(ShortArrayWritable inKey, MatrixBlockWritable inValue,
        Context context) throws IOException, InterruptedException {

    // This task gets each block M_{i,j}, loads the corresponding stripe j
    // of the vector v_{k-1} and produces the partial result of the stripe i
    // of the vector v_k.

    Configuration conf = context.getConfiguration();
    int iter = Integer.parseInt(conf.get("pagerank.iteration"));
    int numPages = Integer.parseInt(conf.get("pagerank.num_pages"));
    short blockSize = Short.parseShort(conf.get("pagerank.block_size"));

    Writable[] blockIndexes = inKey.get();
    short i = ((ShortWritable) blockIndexes[0]).get();
    short j = ((ShortWritable) blockIndexes[1]).get();

    int vjSize = (j > numPages / blockSize) ? (numPages % blockSize) : blockSize;
    FloatWritable[] vj = new FloatWritable[vjSize];

    if (iter == 1) {
        // Initial PageRank vector with 1/n for all pages.
        for (int k = 0; k < vj.length; k++) {
            vj[k] = new FloatWritable(1.0f / numPages);
        }
    } else {
        // Load the stripe j of the vector v_{k-1} from the MapFiles.
        Path outputDir = MapFileOutputFormat.getOutputPath(context).getParent();
        Path vjDir = new Path(outputDir, "v" + (iter - 1));
        MapFile.Reader[] readers = MapFileOutputFormat.getReaders(vjDir, conf);
        Partitioner<ShortWritable, FloatArrayWritable> partitioner =
                new HashPartitioner<ShortWritable, FloatArrayWritable>();
        ShortWritable key = new ShortWritable(j);
        FloatArrayWritable value = new FloatArrayWritable();
        MapFileOutputFormat.getEntry(readers, partitioner, key, value);
        Writable[] writables = value.get();
        for (int k = 0; k < vj.length; k++) {
            vj[k] = (FloatWritable) writables[k];
        }
        for (MapFile.Reader reader : readers) {
            reader.close();
        }
    }

    // Initialize the partial result i of the vector v_k.
    int viSize = (i > numPages / blockSize) ? (numPages % blockSize) : blockSize;
    FloatWritable[] vi = new FloatWritable[viSize];
    for (int k = 0; k < vi.length; k++) {
        vi[k] = new FloatWritable(0);
    }

    // Multiply M_{i,j} by the stripe j of the vector v_{k-1} to obtain the
    // partial result i of the vector v_k.
    Writable[][] blockColumns = inValue.get();
    for (int k = 0; k < blockColumns.length; k++) {
        Writable[] blockColumn = blockColumns[k];
        if (blockColumn.length > 0) {
            int vDegree = ((ShortWritable) blockColumn[0]).get();
            for (int columnIndex = 1; columnIndex < blockColumn.length; columnIndex++) {
                int l = ((ShortWritable) blockColumn[columnIndex]).get();
                vi[l].set(vi[l].get() +  (1.0f / vDegree) * vj[k].get());
            }
        }
    }

    context.write(new ShortWritable(i), new FloatArrayWritable(vi));
}
项目:spork    文件:TezCompiler.java   
@Override
public void visitRank(PORank op) throws VisitorException {
    try{
        // Rank implementation has 3 vertices
        // Vertex 1 has POCounterTez produce output tuples and send to Vertex 3 via 1-1 edge.
        // Vertex 1 also sends the count of tuples of each task in Vertex 1 to Vertex 2 which is a single reducer.
        // Vertex 3 has PORankTez which consumes from Vertex 2 as broadcast input and also tuples from Vertex 1 and
        // produces tuples with updated ranks based on the count of tuples from Vertex 2.
        // This is different from MR implementation where POCounter updates job counters, and that is
        // copied by JobControlCompiler into the PORank job's jobconf.

        // Previous operator is always POCounterTez (Vertex 1)
        TezOperator counterOper = curTezOp;
        POCounterTez counterTez = (POCounterTez) counterOper.plan.getLeaves().get(0);

        //Construct Vertex 2
        TezOperator statsOper = getTezOp();
        tezPlan.add(statsOper);
        POCounterStatsTez counterStatsTez = new POCounterStatsTez(OperatorKey.genOpKey(scope));
        statsOper.plan.addAsLeaf(counterStatsTez);
        statsOper.setRequestedParallelism(1);
        statsOper.setDontEstimateParallelism(true);

        //Construct Vertex 3
        TezOperator rankOper = getTezOp();
        tezPlan.add(rankOper);
        PORankTez rankTez = new PORankTez(op);
        rankOper.plan.addAsLeaf(rankTez);
        curTezOp = rankOper;

        // Connect counterOper vertex to rankOper vertex by 1-1 edge
        rankOper.setRequestedParallelismByReference(counterOper);
        TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan, counterOper, rankOper);
        rankOper.setUseMRMapSettings(counterOper.isUseMRMapSettings());
        TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.ONE_TO_ONE);
        counterTez.setTuplesOutputKey(rankOper.getOperatorKey().toString());
        rankTez.setTuplesInputKey(counterOper.getOperatorKey().toString());

        // Connect counterOper vertex to statsOper vertex by Shuffle edge
        edge = TezCompilerUtil.connect(tezPlan, counterOper, statsOper);
        // Task id
        edge.setIntermediateOutputKeyClass(IntWritable.class.getName());
        edge.partitionerClass = HashPartitioner.class;
        // Number of records in that task
        edge.setIntermediateOutputValueClass(LongWritable.class.getName());
        counterTez.setStatsOutputKey(statsOper.getOperatorKey().toString());
        counterStatsTez.setInputKey(counterOper.getOperatorKey().toString());

        // Connect statsOper vertex to rankOper vertex by Broadcast edge
        edge = TezCompilerUtil.connect(tezPlan, statsOper, rankOper);
        // Map of task id, offset count based on total number of records is in the value
        TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.BROADCAST);
        counterStatsTez.setOutputKey(rankOper.getOperatorKey().toString());
        rankTez.setStatsInputKey(statsOper.getOperatorKey().toString());

        phyToTezOpMap.put(op, rankOper);
    } catch (Exception e) {
        int errCode = 2034;
        String msg = "Error compiling operator " + op.getClass().getSimpleName();
        throw new TezCompilerException(msg, errCode, PigException.BUG, e);
    }
}
项目:oryx    文件:JobStep.java   
protected final GroupingOptions groupWithComparator(
    Class<? extends RawComparator<?>> comparator) {
  return groupingOptions(HashPartitioner.class, comparator);
}
项目:zieook    文件:RowSimilarityZieOok.java   
@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException
{

    addInputOption();
    addOutputOption();
    addOption("numberOfColumns", "r", "Number of columns in the input matrix");
    addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use "
            + "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
    addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: "
            + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null)
    {
        return -1;
    }

    int numberOfColumns = Integer.parseInt(parsedArgs.get("--numberOfColumns"));
    String similarityClassnameArg = parsedArgs.get("--similarityClassname");
    String distributedSimilarityClassname;
    try
    {
        distributedSimilarityClassname = SimilarityType.valueOf(similarityClassnameArg)
                .getSimilarityImplementationClassName();
    }
    catch (IllegalArgumentException iae)
    {
        distributedSimilarityClassname = similarityClassnameArg;
    }

    int maxSimilaritiesPerRow = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerRow"));

    Path inputPath = getInputPath();
    Path outputPath = getOutputPath();
    Path tempDirPath = new Path(parsedArgs.get("--tempDir"));

    Path weightsPath = new Path(tempDirPath, "weights");
    Path pairwiseSimilarityPath = new Path(tempDirPath, "pairwiseSimilarity");

    AtomicInteger currentPhase = new AtomicInteger();

    if (shouldRunNextPhase(parsedArgs, currentPhase))
    {
        Job weights = prepareJob(inputPath, weightsPath, SequenceFileInputFormat.class, RowWeightMapper.class,
                VarIntWritable.class, WeightedOccurrence.class, WeightedOccurrencesPerColumnReducer.class,
                VarIntWritable.class, WeightedOccurrenceArray.class, SequenceFileOutputFormat.class);

        weights.getConfiguration().set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
        weights.waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase))
    {
        Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, SequenceFileInputFormat.class,
                CooccurrencesMapper.class, WeightedRowPair.class, Cooccurrence.class, SimilarityReducer.class,
                SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class,
                SequenceFileOutputFormat.class);

        Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
        pairwiseConf.set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
        pairwiseConf.setInt(NUMBER_OF_COLUMNS, numberOfColumns);
        pairwiseSimilarity.waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase))
    {
        Job asMatrix = prepareJob(pairwiseSimilarityPath, outputPath, SequenceFileInputFormat.class, Mapper.class,
                SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class,
                EntriesToVectorsReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
        asMatrix.setPartitionerClass(HashPartitioner.class);
        asMatrix.setGroupingComparatorClass(SimilarityMatrixEntryKey.SimilarityMatrixEntryKeyGroupingComparator.class);
        asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
        asMatrix.waitForCompletion(true);
    }

    return 0;
}
项目:applications    文件:RandomGraphGenerator.java   
public static void main(String[] args) throws Exception {
  if (args.length != 4) {
    System.out
        .println("USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>");
    return;
  }
  System.out.println(Arrays.toString(args));
  Configuration conf = new Configuration();
  conf.setInt("hama.num.vertices", Integer.parseInt(args[0]));
  conf.setInt("hama.num.partitions", Integer.parseInt(args[2]));
  conf.setInt("number.edges", Integer.parseInt(args[1]));
  Job job = new Job(conf);

  Path generated = new Path(new Path(args[3]).getParent(), "generated");
  FileOutputFormat.setOutputPath(job, generated);
  FileSystem.get(conf).delete(generated, true);

  job.setJobName("RangeWriter");

  job.setJarByClass(SortGenMapper.class);
  job.setMapperClass(SortGenMapper.class);
  job.setNumReduceTasks(0);

  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);

  job.setInputFormatClass(RangeInputFormat.class);

  job.waitForCompletion(true);
  conf.setInt("max.id", Integer.valueOf(args[0]));
  job = new Job(conf);

  FileOutputFormat.setOutputPath(job, new Path(args[3]));
  FileSystem.get(conf).delete(new Path(args[3]), true);

  job.setJobName("Random Vertex Writer");

  FileInputFormat.addInputPath(job, generated);

  job.setJarByClass(RandomMapper.class);
  job.setMapperClass(RandomMapper.class);
  job.setReducerClass(Reducer.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);

  job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2));
  job.setPartitionerClass(HashPartitioner.class);

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);

  job.waitForCompletion(true);
}