Java 类org.apache.hadoop.mapreduce.lib.input.MultipleInputs 实例源码

项目:mapreduce-samples    文件:Multiplication.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf);
    job.setJarByClass(Multiplication.class);

    ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);
    ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf);

    job.setMapperClass(CooccurrenceMapper.class);
    job.setMapperClass(RatingMapper.class);

    job.setReducerClass(MultiplicationReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class);

    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    job.waitForCompletion(true);
}
项目:mapreduce-samples    文件:UnitMultiplication.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
项目:mapreduce-samples    文件:UnitMultiplication.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
项目:mapreduce-samples    文件:UnitSum.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);

        ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
        ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);

        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
项目:Data-Science-with-Hadoop    文件:ReduceJoin.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Reduce-side join");
    job.setJarByClass(ReduceJoin.class);
    job.setReducerClass(ReduceJoinReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, SalesRecordMapper.class) ;
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, AccountRecordMapper.class) ;
    //        FileOutputFormat.setOutputPath(job, new Path(args[2]));
    Path outputPath = new Path(args[2]);
    FileOutputFormat.setOutputPath(job, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:book-merger    文件:BookMerger.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "book merger");

    job.setJarByClass(BookMerger.class);
    job.setCombinerClass(BookDataReducer.class);
    job.setReducerClass(BookDataReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BookMapWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(args[0]));
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CanonicalMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[2]), TextInputFormat.class, LibraryThingMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[3]), TextInputFormat.class, LTScrapedMapper.class);

    job.waitForCompletion(true);
}
项目:mara    文件:MultiInputAnnotationHandler.java   
@Override
public void process(Annotation annotation, Job job, Object target)
        throws ToolException {
    for (Input input : ((MultiInput)annotation).value()) {
        Path path = getInputAsPath(input.path());
        if (input.mapper() == Mapper.class) {
            MultipleInputs.addInputPath(job, path, input.format());
        }
        else {
            MultipleInputs.addInputPath(job, path, input.format(), input.mapper());
            // Need to call again here so the call is captured by our aspect which
            // will replace it with the annotated delegating mapper class for resource
            // injection if required.
            job.setMapperClass(DelegatingMapper.class);
        }
    }
}
项目:mara    文件:JobAnnotationUtilTest.java   
@Test @Ignore // NOT WORKING
public void testConfigureJobFromClass() {
    Class<?> clazz = TestMapper.class;

    try {
        PowerMockito.mockStatic(MultipleInputs.class);

        // Now configure
        PowerMockito.verifyStatic(Mockito.times(6));
        annotationUtil.configureJobFromClass(clazz, job);

    } catch (ToolException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
项目:book-hadoop-hacks    文件:TestReduceSideJoin.java   
public int run(String[] args) throws Exception {
    Job job=Job.getInstance(getConf(), "reduce side join");
    job.setJarByClass(getClass());

    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class,ReduceSideJoinMasterMap.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class,ReduceSideJoinMasterMap.class);

    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setPartitionerClass(ReducesidejoinPartitioner.class);
    job.setGroupingComparatorClass(ReduceSideJoinGroupingComparator.class);

    job.setReducerClass(ReduceSideJoinReduce.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    return job.waitForCompletion(true)?0:1; 
}
项目:hadoop-fieldformat    文件:TestRun.java   
@Override
public int run(String[] args) throws Exception {
  Configuration conf = getConf();
  //conf.set("mapreduce.fieldoutput.header", "ct_audit,ct_action");
  Job job = new Job(conf);
  job.setJobName("test fieldInput");
  job.setJarByClass(TestRun.class);
  MultipleInputs.addInputPath(job, new Path(args[0]), FieldInputFormat.class, CTMapper.class);
  job.setNumReduceTasks(0);
  //FileOutputFormat.setOutputPath(job, new Path(args[1]));
  FieldOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setOutputFormatClass(FieldOutputFormat.class);
  job.submit();
  job.waitForCompletion(true);
  return 0;
}
项目:hadoop-map-reduce-patterns    文件:PostCommentHierarchy.java   
@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "PostCommentHeirarchy");
    job.setJarByClass(PostCommentHierarchy.class);

    MultipleInputs.addInputPath(job, new Path(args[0]),
            TextInputFormat.class, PostMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]),
            TextInputFormat.class, CommentMapper.class);

    job.setReducerClass(PostCommentHierarchyReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 2;
}
项目:InsAdjustment    文件:InsDriver.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf);
    job.setJarByClass(InsDriver.class);
    // job.setMapperClass(PFMapper.class);
    job.setReducerClass(InsReducer.class);
    // job.setNumReduceTasks(0);
    job.setJobName("Participant Adjustment PoC");

    String busDate = args[3].toString();
    job.getConfiguration().set("BUS_DATE", busDate);

    // map-reduce job.
    Path inputPath1 = new Path(args[0]);
    Path inputPath2 = new Path(args[1]);
    Path outputPath = new Path(args[2]);

    MultipleInputs.addInputPath(job, inputPath1, TextInputFormat.class, PFMapper.class);
    MultipleInputs.addInputPath(job, inputPath2, TextInputFormat.class, BRMapper.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    // TODO: Update the output path for the output directory of the
    // map-reduce job.
    // configuration should contain reference to your namenode

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Submit the job and wait for it to finish.
    job.waitForCompletion(true);
}
项目:Bigdata    文件:Question3.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: Question3 <in> <out>");
        System.exit(3);
    }

    @SuppressWarnings("deprecation")
    Job job1 = new Job(conf, "averageRating");
    @SuppressWarnings("deprecation")
    Job job2 = new Job(conf,"reduceSideJoin");
    job1.setJarByClass(Question3.class); 
    job1.setMapperClass(TopTenMap.class);
    job1.setReducerClass(TopTenReduce.class);

    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(FloatWritable.class);

    FileInputFormat.addInputPath(job1, new Path(otherArgs[0]));    
    FileOutputFormat.setOutputPath(job1, new Path("/bxr140530/Asgn/temp"));

    if(job1.waitForCompletion(true))
    {
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        job2.setJarByClass(Question3.class);
        job2.setMapperClass(TopTenJoinMap.class);
        job2.setReducerClass(TopTenJoinReduce.class);

        MultipleInputs.addInputPath(job2,new Path("/bxr140530/Asgn/temp"),TextInputFormat.class,TopTenJoinMap.class);
        MultipleInputs.addInputPath(job2,new Path(otherArgs[1]),TextInputFormat.class,BusinessMap.class);
        FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));

        job2.waitForCompletion(true);


    }
}
项目:incubator-rya    文件:JoinSelectStatsUtil.java   
public static void initJoinMRJob(Job job, String prospectsPath, String spoPath, Class<? extends Mapper<CompositeType,TripleCard,?,?>> mapperClass,
    String outPath, String auths) throws AccumuloSecurityException {

  MultipleInputs.addInputPath(job, new Path(prospectsPath), SequenceFileInputFormat.class, mapperClass);
  MultipleInputs.addInputPath(job, new Path(spoPath), SequenceFileInputFormat.class, mapperClass);
  job.setMapOutputKeyClass(CompositeType.class);
  job.setMapOutputValueClass(TripleCard.class);

  SequenceFileOutputFormat.setOutputPath(job, new Path(outPath));
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(TripleEntry.class);
  job.setOutputValueClass(CardList.class);

}
项目:incubator-rya    文件:JoinSelectStatisticsTest.java   
@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    String outpath = conf.get(OUTPUTPATH);

    Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);

    MultipleInputs.addInputPath(job, new Path(PROSPECTSOUT.getAbsolutePath()), 
            SequenceFileInputFormat.class, JoinSelectAggregateMapper.class);
    MultipleInputs.addInputPath(job,new Path(SPOOUT.getAbsolutePath()) , 
            SequenceFileInputFormat.class, JoinSelectAggregateMapper.class);
    job.setMapOutputKeyClass(CompositeType.class);
    job.setMapOutputValueClass(TripleCard.class);

    tempDir = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + "");
    SequenceFileOutputFormat.setOutputPath(job, new Path(tempDir.getAbsolutePath()));
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(TripleEntry.class);
    job.setOutputValueClass(CardList.class);


    job.setSortComparatorClass(JoinSelectSortComparator.class);
    job.setGroupingComparatorClass(JoinSelectGroupComparator.class);
    job.setPartitionerClass(JoinSelectPartitioner.class);
    job.setReducerClass(JoinReducer.class);
    job.setNumReduceTasks(32);
    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;          
}
项目:incubator-rya    文件:AbstractReasoningTool.java   
/**
 * Set up the MapReduce job to use Accumulo as an input.
 * @param tableMapper Mapper class to use
 */
protected void configureAccumuloInput(Class<? extends Mapper<Key,Value,?,?>> tableMapper)
        throws AccumuloSecurityException {
    MRReasoningUtils.configureAccumuloInput(job);
    MultipleInputs.addInputPath(job, new Path("/tmp/input"),
        AccumuloInputFormat.class, tableMapper);
}
项目:incubator-rya    文件:AbstractReasoningTool.java   
/**
 * Set up the MapReduce job to use an RDF file as an input.
 * @param rdfMapper class to use
 */
protected void configureRdfInput(Path inputPath,
        Class<? extends Mapper<LongWritable, RyaStatementWritable, ?, ?>> rdfMapper) {
    Configuration conf = job.getConfiguration();
    String format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.RDFXML.getName());
    conf.set(MRUtils.FORMAT_PROP, format);
    MultipleInputs.addInputPath(job, inputPath,
        RdfFileInputFormat.class, rdfMapper);
}
项目:hiped2    文件:StreamingRepartitionJoin.java   
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(Options.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path usersPath = new Path(cli.getArgValueAsString(Options.USERS));
  Path userLogsPath = new Path(cli.getArgValueAsString(Options.USER_LOGS));
  Path outputPath = new Path(cli.getArgValueAsString(Options.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);
  job.setJarByClass(StreamingRepartitionJoin.class);

  MultipleInputs.addInputPath(job, usersPath, TextInputFormat.class, UserMap.class);
  MultipleInputs.addInputPath(job, userLogsPath, TextInputFormat.class, UserLogMap.class);

  ShuffleUtils.configBuilder()
      .useNewApi()
      .setSortIndices(KeyFields.USER, KeyFields.DATASET)
      .setPartitionerIndices(KeyFields.USER)
      .setGroupIndices(KeyFields.USER)
      .configure(job.getConfiguration());

  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(Tuple.class);
  job.setMapOutputValueClass(Tuple.class);

  FileOutputFormat.setOutputPath(job, outputPath);

  return job.waitForCompletion(true) ? 0 : 1;
}
项目:hiped2    文件:BloomJoin.java   
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(Options.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path usersPath = new Path(cli.getArgValueAsString(Options.USERS));
  Path userLogsPath = new Path(cli.getArgValueAsString(Options.USER_LOGS));
  Path bloomPath = new Path(cli.getArgValueAsString(Options.BLOOM_FILE));
  Path outputPath = new Path(cli.getArgValueAsString(Options.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);

  job.setJarByClass(BloomJoin.class);
  MultipleInputs.addInputPath(job, usersPath, TextInputFormat.class, UserMap.class);
  MultipleInputs.addInputPath(job, userLogsPath, TextInputFormat.class, UserLogMap.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Tuple.class);

  job.setReducerClass(Reduce.class);

  job.addCacheFile(bloomPath.toUri());
  job.getConfiguration().set(AbstractFilterMap.DISTCACHE_FILENAME_CONFIG, bloomPath.getName());

  FileInputFormat.setInputPaths(job, userLogsPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  return job.waitForCompletion(true) ? 0 : 1;
}
项目:incubator-mrql    文件:MapOperation.java   
/** The cMap physical operator
 * @param map_fnc       mapper function
 * @param acc_fnc       optional accumulator function
 * @param zero          optional the zero value for the accumulator
 * @param source        input data source
 * @param stop_counter  optional counter used in repeat operation
 * @return a new data source that contains the result
 */
public final static DataSet cMap ( Tree map_fnc,         // mapper function
                                   Tree acc_fnc,         // optional accumulator function
                                   Tree zero,            // optional the zero value for the accumulator
                                   DataSet source,       // input data source
                                   String stop_counter ) // optional counter used in repeat operation
                            throws Exception {
    conf = MapReduceEvaluator.clear_configuration(conf);
    String newpath = new_path(conf);
    conf.set("mrql.mapper",map_fnc.toString());
    conf.set("mrql.counter",stop_counter);
    if (zero != null) {
        conf.set("mrql.accumulator",acc_fnc.toString());
        conf.set("mrql.zero",zero.toString());
    } else conf.set("mrql.zero","");
    setupSplits(source,conf);
    Job job = new Job(conf,newpath);
    distribute_compiled_arguments(job.getConfiguration());
    job.setJarByClass(MapReducePlan.class);
    job.setOutputKeyClass(MRContainer.class);
    job.setOutputValueClass(MRContainer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    for (DataSource p: source.source)
        MultipleInputs.addInputPath(job,new Path(p.path),(Class<? extends MapReduceMRQLFileInputFormat>)p.inputFormat,cMapMapper.class);
    FileOutputFormat.setOutputPath(job,new Path(newpath));
    job.setNumReduceTasks(0);
    job.waitForCompletion(true);
    long c = (stop_counter.equals("-")) ? 0
             : job.getCounters().findCounter("mrql",stop_counter).getValue();
    return new DataSet(new BinaryDataSource(newpath,conf),c,outputRecords(job));
}
项目:hadoop-in-action    文件:JoinRecordWithStationName.java   
@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        JobBuilder
                .printUsage(this, "<ncdc input> <station input> <output>");
        return -1;
    }

    Job job = Job.getInstance(getConf(), "Join weather records with station names");
    job.setJarByClass(getClass());

    Path ncdcInputPath = new Path(args[0]);
    Path stationInputPath = new Path(args[1]);
    Path outputPath = new Path(args[2]);

    MultipleInputs.addInputPath(job, ncdcInputPath, TextInputFormat.class,
            JoinRecordMapper.class);
    MultipleInputs.addInputPath(job, stationInputPath,
            TextInputFormat.class, JoinStationMapper.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setGroupingComparatorClass(TextPair.FirstComparator.class);

    job.setMapOutputKeyClass(TextPair.class);

    job.setReducerClass(JoinReducer.class);

    job.setOutputKeyClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}
项目:shaf    文件:PathInputConfig.java   
/**
 * Configures the job input for the file-system data source.
 */
@Override
public void configure(
        final Class<? extends DistributedProcess<?, ?, ?, ?, ?, ?>> pcls,
        ProcessConfiguration config, Job job) throws JobConfigException {
    try {
        int index = 0;

        InputTokenizer it = config.getInputTokenizer();
        while (it.nextToken()) {
            LOG.debug("Configuring input number: " + (index++));

            Path path = new Path(config.getBase(), it.getPath());
            if (it.isMapperClassNameDefined()) {
                @SuppressWarnings("unchecked")
                Class<? extends MapProcess<?, ?, ?, ?>> mcls = (Class<? extends MapProcess<?, ?, ?, ?>>) DynamicClassLoader
                        .getClassByName(it.getMapperClassName());
                JobInput anno = mcls.getAnnotation(JobInput.class);

                MultipleInputs.addInputPath(job, path, anno.formatClass(),
                        HadoopMapProcessWrapper.class);
                HadoopMapProcessWrapper.addWrappingProcessClass(job, path,
                        mcls);
                LOG.debug("Adds input format: "
                        + anno.formatClass());
                LOG.debug("Adds mapper: " + mcls);

            }

            FileInputFormat.addInputPath(job, path);
            LOG.debug("Adds input path:   " + path);
        }
    } catch (PropertyNotFoundException | ClassNotFoundException
            | IOException exc) {
        throw new JobConfigException("Failed to configure job input.", exc);
    }
}
项目:incubator-blur    文件:IndexerJobDriver.java   
private boolean runMrWithLookup(String uuid, TableDescriptor descriptor, List<Path> inprogressPathList, String table,
    Path fileCache, Path outputPath, int reducerMultipler, Path tmpPath, TableStats tableStats, String snapshot)
    throws ClassNotFoundException, IOException, InterruptedException {
  PartitionedInputResult result = buildPartitionedInputData(uuid, tmpPath, descriptor, inprogressPathList, snapshot,
      fileCache);

  Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]");

  ExistingDataIndexLookupMapper.setSnapshot(job, MRUPDATE_SNAPSHOT);
  FileInputFormat.addInputPath(job, result._partitionedInputData);
  MultipleInputs.addInputPath(job, result._partitionedInputData, SequenceFileInputFormat.class,
      ExistingDataIndexLookupMapper.class);

  for (Path p : inprogressPathList) {
    FileInputFormat.addInputPath(job, p);
    MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, NewDataMapper.class);
  }

  BlurOutputFormat.setOutputPath(job, outputPath);
  BlurOutputFormat.setupJob(job, descriptor);

  job.setReducerClass(UpdateReducer.class);
  job.setMapOutputKeyClass(IndexKey.class);
  job.setMapOutputValueClass(IndexValue.class);
  job.setPartitionerClass(IndexKeyPartitioner.class);
  job.setGroupingComparatorClass(IndexKeyWritableComparator.class);

  BlurOutputFormat.setReducerMultiplier(job, reducerMultipler);

  boolean success = job.waitForCompletion(true);
  Counters counters = job.getCounters();
  LOG.info("Counters [" + counters + "]");
  return success;
}
项目:incubator-blur    文件:IndexerJobDriver.java   
private boolean runMrOnly(TableDescriptor descriptor, List<Path> inprogressPathList, String table, Path fileCache,
    Path outputPath, int reducerMultipler) throws IOException, ClassNotFoundException, InterruptedException {
  Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]");
  Path tablePath = new Path(descriptor.getTableUri());
  BlurInputFormat.setLocalCachePath(job, fileCache);
  BlurInputFormat.addTable(job, descriptor, MRUPDATE_SNAPSHOT);
  MultipleInputs.addInputPath(job, tablePath, BlurInputFormat.class, ExistingDataMapper.class);

  for (Path p : inprogressPathList) {
    FileInputFormat.addInputPath(job, p);
    MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, NewDataMapper.class);
  }

  BlurOutputFormat.setOutputPath(job, outputPath);
  BlurOutputFormat.setupJob(job, descriptor);

  job.setReducerClass(UpdateReducer.class);
  job.setMapOutputKeyClass(IndexKey.class);
  job.setMapOutputValueClass(IndexValue.class);
  job.setPartitionerClass(IndexKeyPartitioner.class);
  job.setGroupingComparatorClass(IndexKeyWritableComparator.class);

  BlurOutputFormat.setReducerMultiplier(job, reducerMultipler);

  boolean success = job.waitForCompletion(true);
  Counters counters = job.getCounters();
  LOG.info("Counters [" + counters + "]");
  return success;
}
项目:MapReduceSamplesJava    文件:PostCommentHierarchy.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args)
            .getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: PostCommentHierarchy <posts> <comments> <outdir>");
        System.exit(1);
    }

    Job job = new Job(conf, "PostCommentHierarchy");
    job.setJarByClass(PostCommentHierarchy.class);

    MultipleInputs.addInputPath(job, new Path(otherArgs[0]),
            TextInputFormat.class, PostMapper.class);

    MultipleInputs.addInputPath(job, new Path(otherArgs[1]),
            TextInputFormat.class, CommentMapper.class);

    job.setReducerClass(PostCommentHierarchyReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    System.exit(job.waitForCompletion(true) ? 0 : 2);
}
项目:hadoop-map-reduce-patterns    文件:ReplicatedUserJoin.java   
@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length != 4) {
        printUsage();
    }
    Job job = new Job(conf, "ReduceSideJoin");
    job.setJarByClass(ReplicatedUserJoin.class);

    // Use MultipleInputs to set which input uses what mapper
    // This will keep parsing of each data set separate from a logical
    // standpoint
    // The first two elements of the args array are the two inputs
    MultipleInputs.addInputPath(job, new Path(args[0]),
            TextInputFormat.class, UserJoinMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]),
            TextInputFormat.class, CommentJoinMapper.class);
    job.getConfiguration().set("join.type", args[2]);

    job.setReducerClass(UserJoinReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(args[3]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 2;
}
项目:hadoop-map-reduce-patterns    文件:ReduceSideJoinBloomFilter.java   
@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length != 4) {
        printUsage();
    }
    Job job = new Job(conf, "ReduceSideJoinBloomFilter");
    job.setJarByClass(ReduceSideJoinBloomFilter.class);

    // Use MultipleInputs to set which input uses what mapper
    // This will keep parsing of each data set separate from a logical
    // standpoint
    // The first two elements of the args array are the two inputs
    MultipleInputs.addInputPath(job, new Path(args[0]),
            TextInputFormat.class, UserJoinMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]),
            TextInputFormat.class, CommentJoinMapperWithBloom.class);
    job.getConfiguration().set("join.type", args[2]);

    job.setReducerClass(UserJoinReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(args[3]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 2;
}
项目:hadoop-map-reduce-patterns    文件:ReduceSideJoin.java   
@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length != 4) {
        printUsage();
    }
    Job job = new Job(conf, "ReduceSideJoin");
    job.setJarByClass(ReduceSideJoin.class);

    // Use MultipleInputs to set which input uses what mapper
    // This will keep parsing of each data set separate from a logical
    // standpoint
    // The first two elements of the args array are the two inputs
    MultipleInputs.addInputPath(job, new Path(args[0]),
            TextInputFormat.class, UserJoinMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]),
            TextInputFormat.class, CommentJoinMapper.class);
    job.getConfiguration().set("join.type", args[2]);

    job.setReducerClass(UserJoinReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(args[3]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 2;
}
项目:npc-recommender-imdb    文件:FScore.java   
public static int run(String[] args, final int REDUCERS, final int T) 
            throws IOException, ClassNotFoundException, InterruptedException{
        //set config
        Configuration c = new Configuration();
        c.set("CPATH", args[2]+"/recommander");
        c.setInt("T", T);

        Job job = new Job(c, "FScore");
        //metrics
        job.setNumReduceTasks(REDUCERS);
        //Classes
        job.setJarByClass(FScore.class);
//      job.setMapperClass(FMapper.class);
        job.setReducerClass(FReducer.class);
        //mapOutput,reduceOutput
//      job.setInputFormatClass(TextInputFormat.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);  
        //IO
//      FileInputFormat.addInputPaths(job, args[1]+"/ratings");
        MultipleInputs.addInputPath(job, new Path(args[1]+"/ratings"), 
                TextInputFormat.class, FMapperRatings.class);
        MultipleInputs.addInputPath(job, new Path(args[2]+"/recommander"), 
                TextInputFormat.class, FMapperResults.class);
        FileOutputFormat.setOutputPath(job, new Path(args[2]+"/fscore"));

        return (job.waitForCompletion(true) ? 0 : -1);
    }
项目:hadoop-relational    文件:Union.java   
@Override
public int run(String[] args) throws Exception {
  System.out.println(Arrays.asList(args).toString());
  // Parse arguments
  Path oneRelationPath = new Path(args[0]),
      twoRelationPath = new Path(args[1]),
      outputRelationPath = new Path(args[2]);

  // Setup job
  Job job = Job.getInstance(conf);
  job.setJarByClass(Union.class);

  MultipleInputs.addInputPath(job, oneRelationPath, TextInputFormat.class, UnionMapper.class);
  MultipleInputs.addInputPath(job, twoRelationPath, TextInputFormat.class, UnionMapper.class);

  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(Text.class);

  // This is a map-only job
  job.setNumReduceTasks(0);

  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, outputRelationPath);

  // Run job
  job.submit();
  return job.waitForCompletion(true) ? 0 : 1;
}
项目:hadoop-relational    文件:Join.java   
@Override
public int run(String[] args) throws Exception {
  System.out.println(Arrays.asList(args).toString());
  // Parse arguments
  Path leftRelationPath = new Path(args[0]),
      rightRelationPath = new Path(args[1]),
      outputRelationPath = new Path(args[2]);
  conf.set(new JoinMapperLeft().getJoinKeyIndicesKey(), args[3]);
  conf.set(new JoinMapperRight().getJoinKeyIndicesKey(), args[4]);

  // Setup job
  Job job = Job.getInstance(conf);
  job.setJarByClass(Join.class);

  MultipleInputs.addInputPath(job, leftRelationPath, TextInputFormat.class, JoinMapperLeft.class);
  MultipleInputs
      .addInputPath(job, rightRelationPath, TextInputFormat.class, JoinMapperRight.class);

  job.setMapOutputKeyClass(Tuple.class);
  job.setMapOutputValueClass(MarkedTuple.class);

  job.setReducerClass(JoinReducer.class);

  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, outputRelationPath);

  // Run job
  job.submit();
  return job.waitForCompletion(true) ? 0 : 1;
}
项目:TimeseriesDelta    文件:RollingAverage.java   
public int run(String[] args) throws Exception 
{   
    Job job = Job.getInstance(config);
    job.setJarByClass(RollingAverage.class);
    job.setJobName("RollingAverage");

   if( args.length != 4 ) {
     System.out.printf("Usage: %s <hdfs-input-dir> <hdfs-output-dir> <local-config-file>\n" , args[0]);
     return -1;
   }

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(TimeseriesKey.class);
    job.setMapOutputValueClass(TimeseriesDataPoint.class);

    job.setMapperClass(RollingAverageMapper.class);
    job.setReducerClass(RollingAverageReducer.class);

    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);
   job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);

    Path inputPath = new Path(args[1]);
    MultipleInputs.addInputPath(job, inputPath, TextInputFormat.class, RollingAverageMapper.class);

    TextOutputFormat.setOutputPath(job, new Path(args[2]));
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

   loadJobConfig( job, args[3] );  
    return (job.waitForCompletion(true) ? 0 : 1);
}
项目:mapreduce-samples    文件:UnitMultiplication.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        //how chain two mapper classes?

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
项目:Bigdata    文件:Top10BusRev.java   
public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();       // get all args
        if (otherArgs.length != 3) {
            System.err.println("Usage: Top10BusRev <review> <fbusiness> <ooutput> ");
            System.exit(2);
        }
        // create a job with name "toptenratemov"
        Job job = new Job(conf, "Top10BusRev");
        job.setJarByClass(Top10BusRev.class);



        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

//      uncomment the following line to add the Combiner
//      job.setCombinerClass(Reduce.class);

        // set output key type 
        job.setOutputKeyClass(Text.class);
        // set output value type
        job.setOutputValueClass(FloatWritable.class);

        //set the HDFS path of the input data
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        // set the HDFS path for the output 
        FileOutputFormat.setOutputPath(job, new Path("/datatemp"));

        //Wait till job completion
        if(job.waitForCompletion(true) == true){

            // create a job with name "toptenratemov"
            Job job2 = new Job(conf, "Top10BusRev");

            job2.setJarByClass(Top10BusRev.class);
            job2.setReducerClass(ReduceTop.class);
            MultipleInputs.addInputPath(job2,  new Path("/datatemp"), TextInputFormat.class,
                    MapTopRating.class);
            MultipleInputs.addInputPath(job2, new Path(otherArgs[1]), TextInputFormat.class,
                    MapTopBusiness.class);




//          uncomment the following line to add the Combiner
//          job.setCombinerClass(Reduce.class);

            // set output key type 
            job2.setOutputKeyClass(Text.class);
            // set output value type
            job2.setOutputValueClass(Text.class);

            //set the HDFS path of the input data
            // set the HDFS path for the output 
            FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));
            job2.waitForCompletion(true);

        }
    }
项目:dkpro-c4corpus    文件:Phase4RemoveDuplicatesUsingReduceSideJoins.java   
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase4RemoveDuplicatesUsingReduceSideJoins.class);
    job.setJobName(Phase4RemoveDuplicatesUsingReduceSideJoins.class.getName());

    // paths
    // text files of ids to be deleted
    String textFilePath = args[0];
    // corpus with *.warc.gz
    String commaSeparatedInputFiles = args[1];
    // output
    String outputPath = args[2];

    //second input the look up text file
    MultipleInputs.addInputPath(job, new Path(textFilePath), TextInputFormat.class,
            JoinTextMapper.class);
    //first input the data set (check comma separated availability)
    MultipleInputs.addInputPath(job, new Path(commaSeparatedInputFiles), WARCInputFormat.class,
            JoinWARCMapper.class);

    job.setPartitionerClass(SourceJoiningKeyPartitioner.class);
    job.setGroupingComparatorClass(SourceJoiningGroupingComparator.class);

    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(WARCWritable.class);

    job.setReducerClass(JoinReducer.class);

    job.setOutputFormatClass(WARCOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    return job.waitForCompletion(true) ? 0 : 1;
}
项目:HiveQueryMRJoin    文件:FinalDriver.java   
public int run(String[] args) throws Exception {
    String inputLoc1 = args[0];
    String inputLoc2 = args[1];
    String inputLoc3 = args[2];
    String outputLoc = args[3];
    String outputLoc2 = args[4];
    String finalOutput = args[5];

    // Job Chaning of multiple jobs.
    // Executing Job 1.
    conf1 = getConf();
    conf1.set("mapred.job.queue.name", "sree");

    Job job1 = new Job(conf1,
            "MapReduce: Performing Cross Join for 2 tables ");
    job1.setJarByClass(FinalDriver.class);
    MultipleInputs.addInputPath(job1, new Path(inputLoc1),
            TextInputFormat.class, ConsumerMapper.class);
    MultipleInputs.addInputPath(job1, new Path(inputLoc2),
            TextInputFormat.class, PurchasesMapper.class);
    job1.setReducerClass(IntmdteJoinReducer.class);
    job1.setMapOutputKeyClass(IntWritable.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(NullWritable.class);
    job1.setOutputValueClass(Text.class);
    log.info("Input Path to the map-reduce job " + inputLoc1 + " and  "
            + inputLoc2);
    log.info("Ouput Path to the map-reduce job " + outputLoc);
    Path output = new Path(outputLoc);
    FileOutputFormat.setOutputPath(job1, output);
    job1.waitForCompletion(true);

    // Job 2
    conf2 = getConf();
    conf2.set("mapred.job.queue.name", "sree");
    Job job2 = new Job(conf2,
            "MapReduce: Performing Cross Join for Final table");
    job2.setJarByClass(FinalDriver.class);
    MultipleInputs.addInputPath(job2, new Path(outputLoc),
            TextInputFormat.class, JoinResultMapper.class);
    MultipleInputs.addInputPath(job2, new Path(inputLoc3),
            TextInputFormat.class, TransactionMapper.class);
    job2.setReducerClass(IntermediateJoinReducer.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(NullWritable.class);
    job2.setOutputValueClass(Text.class);
    log.info("Input Path to the map-reduce job " + inputLoc1 + " and  "
            + inputLoc2);
    log.info("Ouput Path to the map-reduce job " + outputLoc2);
    Path output1 = new Path(outputLoc2);
    FileSystem.get(conf2).delete(output1, true);
    FileOutputFormat.setOutputPath(job2, output1);
    job2.waitForCompletion(true);

    // Job 3
    conf3 = getConf();
    conf3.set("mapred.job.queue.name", "sree");
    Job job3 = new Job(conf3, "MapReduce : Final Join ");
    job3.setJarByClass(FinalDriver.class);
    FileInputFormat.addInputPath(job3, new Path(outputLoc2));
    job3.setMapperClass(FinalMapper.class);
    job3.setReducerClass(FinalReducer.class);
    job3.setMapOutputKeyClass(Text.class);
    job3.setMapOutputValueClass(Text.class);
    job3.setOutputKeyClass(NullWritable.class);
    job3.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(job3, new Path(finalOutput));

    int status = (job3.waitForCompletion(true) == true) ? 0 : 1;
    return status;

}
项目:CBIR-on-Hadoop    文件:SeqReader.java   
public static void main(String[] args) throws IOException,
        ClassNotFoundException, InterruptedException {

    if (args.length < 3) {
        System.out.println("Usage: <jar file> <sequence filename(s)> "
                + "<desired number of output sequence files> "
                + "<ABSOLUTE path to hdfs locaiton where the output folder "
                + "will automatically be created>");
        System.exit(0);
    }

    int numOutputFiles = Integer.parseInt(args[args.length - 2]);
    if (numOutputFiles < 1) {
        // someone is screwing around
        numOutputFiles = 1;
    }

    String absPath = args[args.length - 1];
    if (absPath.charAt(absPath.length() - 1) != '/') {
        absPath += "/";
    }

    DateFormat dateFormat = new SimpleDateFormat("ddMMyyyyHHmmss");
    Date date = new Date();
    String baseOutputName = absPath + "SeqReader" + dateFormat.format(date);

    Configuration conf = new Configuration();
    conf.set("BASE_OUTPUT_FILE_NAME", baseOutputName);
    conf.set("NUM_REDUCERS", Integer.toString(numOutputFiles));
    Job job = Job.getInstance(conf);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(SequenceFileToImageMapper.class);
    job.setReducerClass(SequenceFileToImageReducer.class);
    job.setPartitionerClass(SequenceFileToImagePartitioner.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    // job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(numOutputFiles);

    LazyOutputFormat.setOutputFormatClass(job,
            SequenceFileOutputFormat.class);

    for (int i = 0; i < args.length - 2; i++) {
        // FileInputFormat.setInputPaths(job, new Path(args[i]));
        MultipleInputs.addInputPath(job, new Path(args[i]),
                SequenceFileInputFormat.class);
    }
    for (int i = 0; i < numOutputFiles; i++) {
        MultipleOutputs.addNamedOutput(job, "n" + Integer.toString(i),
                SequenceFileOutputFormat.class, Text.class, Text.class);
    }
    job.setJarByClass(SeqReader.class);
    FileOutputFormat.setOutputPath(job, new Path(baseOutputName));

    /*  write the output folder location 
     *  to a file in the destination folder
     */
    Path f = new Path(absPath + "SeqReader.outputlocation");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(f)) {
        // File already exists.
        // Delete the file before proceeding.
        fs.delete(f, true);
    }
    FSDataOutputStream os = fs.create(f);
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os,
            "UTF-8"));
    br.write(baseOutputName);
    br.close();
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:incubator-mrql    文件:MapReduceOperation.java   
/**
 * The MapReduce physical operator
 * @param map_fnc          the mapper function
 * @param combine_fnc      optional in-mapper combiner function
 * @param reduce_fnc       the reducer function
 * @param acc_fnc          optional accumulator function
 * @param zero             optional the zero value for the accumulator
 * @param source           the input data source
 * @param num_reduces      number of reducers
 * @param stop_counter     optional counter used in repeat operation
 * @param orderp           does the result need to be ordered?
 * @return a new data source that contains the result
 */
public final static DataSet mapReduce ( Tree map_fnc,         // mapper function
                                        Tree combine_fnc,     // optional in-mapper combiner function
                                        Tree reduce_fnc,      // reducer function
                                        Tree acc_fnc,         // optional accumulator function
                                        Tree zero,            // optional the zero value for the accumulator
                                        DataSet source,       // input data source
                                        int num_reduces,      // number of reducers
                                        String stop_counter,  // optional counter used in repeat operation
                                        boolean orderp )      // does the result need to be ordered?
                            throws Exception {
    conf = MapReduceEvaluator.clear_configuration(conf);
    String newpath = new_path(conf);
    conf.set("mrql.mapper",map_fnc.toString());
    if (combine_fnc != null)
        conf.set("mrql.combiner",combine_fnc.toString());
    conf.set("mrql.reducer",reduce_fnc.toString());
    if (zero != null) {   // will use in-mapper combiner
        conf.set("mrql.accumulator",acc_fnc.toString());
        conf.set("mrql.zero",zero.toString());
    } else conf.set("mrql.zero","");
    conf.set("mrql.counter",stop_counter);
    setupSplits(source,conf);
    Job job = new Job(conf,newpath);
    distribute_compiled_arguments(job.getConfiguration());
    job.setJarByClass(MapReducePlan.class);
    job.setOutputKeyClass(MRContainer.class);
    job.setOutputValueClass(MRContainer.class);
    job.setPartitionerClass(MRContainerPartitioner.class);
    job.setSortComparatorClass(MRContainerKeyComparator.class);
    job.setGroupingComparatorClass(MRContainerKeyComparator.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    for (DataSource p: source.source)
        MultipleInputs.addInputPath(job,new Path(p.path),(Class<? extends MapReduceMRQLFileInputFormat>)p.inputFormat,MRMapper.class);
    FileOutputFormat.setOutputPath(job,new Path(newpath));
    job.setReducerClass(MRReducer.class);
    if (Config.trace && PlanGeneration.streamed_MapReduce_reducer(reduce_fnc))
        System.out.println("Streamed MapReduce reducer");
    if (num_reduces > 0)
        job.setNumReduceTasks(num_reduces);
    job.waitForCompletion(true);
    long c = (stop_counter.equals("-")) ? 0
             : job.getCounters().findCounter("mrql",stop_counter).getValue();
    DataSource s = new BinaryDataSource(newpath,conf);
    s.to_be_merged = orderp;
    return new DataSet(s,c,outputRecords(job));
}
项目:RStore    文件:IncreQueryOperator.java   
public static void exe(DataCube dc) throws IOException,
        InterruptedException, ClassNotFoundException, ParseException {
    String datacubeName = dc.cuboid.cuboidName;
    String tableName = dc.tableName;
    // Path inputPath1 = new Path(datacubeName);

    Path inputPath1 = dc.cubePath;
    Path inputPath2 = new Path(tableName);
    Path outputPath = new Path(dc.outputPath);

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath);
    }

    Configuration config = HBaseConfiguration.create();
    config.set("datacube", dc.serialize());
    Job job = new Job(config, "Incre Querying Operator");
    job.setJarByClass(IncreQueryOperator.class); // class that contains
                                                    // mapper and reducer
    Scan scan = new Scan();
    // Scan scan1 = new Scan(startDate.getTime(), endDate.getTime());
    // Scan scan = new Scan(dc.cubeRefreshTime, dc.queryingTime);

    scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                            // MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs

    TableMapReduceUtil.initTableMapperJob(tableName, // input table
            scan, RealTimeScanMapper.class, // mapper class
            Text.class, // mapper output key
            FloatWritable.class, // mapper output value
            job);

    job.setReducerClass(CubeAggregationReducer.class); // reducer class
    job.setOutputFormatClass(TextOutputFormat.class);

    MultipleInputs.addInputPath(job, inputPath1, TextInputFormat.class,
            CubeScanMapper.class);
    MultipleInputs.addInputPath(job, inputPath2, TableInputFormat.class,
            RealTimeScanMapper.class);

    FileOutputFormat.setOutputPath(job, outputPath);
    job.waitForCompletion(true);
}
项目:mgfsm    文件:MaxFsmJob.java   
public static void runMaxFsmJob() throws IOException, InterruptedException,
    ClassNotFoundException {

  Type outputType = commonConfig.getType();
  int numberOfReducers = commonConfig.getNumberOfReducers();

  Job mCJob = new Job();
  mCJob.setJarByClass(MaxFsmJob.class);
  mCJob.setJobName("MG-FSM+");

  mCJob.getConfiguration().setEnum("org.apache.mahout.fsm.partitioning.outputType", outputType);

  MultipleInputs.addInputPath(mCJob, 
                              new Path(commonConfig.getTmpPath()),
                              SequenceFileInputFormat.class, 
                              MaxFsmMapper.class);
  MultipleInputs.addInputPath(mCJob, 
                              commonConfig.getFlistPath(), 
                              SequenceFileInputFormat.class,
                              MaxFsmMapper.class);

  FileOutputFormat.setOutputPath(mCJob, new Path(commonConfig.getOutputPath()));

  mCJob.setSortComparatorClass(BytesWritable.Comparator.class);

  mCJob.setOutputFormatClass(SequenceFileOutputFormat.class);

  mCJob.setCombinerClass(MaxFsmCombiner.class);
  mCJob.setReducerClass(MaxFsmReducer.class);

  mCJob.setMapOutputKeyClass(BytesWritable.class);
  mCJob.setMapOutputValueClass(LongWritable.class);

  mCJob.setNumReduceTasks(numberOfReducers);
  mCJob.getConfiguration().set("mapreduce.cluster.reducememory.mb", "4096");

  mCJob.setOutputKeyClass(IntArrayWritable.class);
  mCJob.setOutputValueClass(LongWritable.class);

  mCJob.waitForCompletion(true);

  while (!mCJob.isComplete()) {
  }

}