Java 类org.apache.hadoop.mapreduce.Job 实例源码

项目:aliyun-maxcompute-data-collectors    文件:TestImportJob.java   
@Override
public void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

  // Write a line of text into a file so that we can get
  // a record to the map task.
  Path dir = new Path(this.options.getTempDir());
  Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
  FileSystem fs = FileSystem.getLocal(this.options.getConf());
  if (fs.exists(p)) {
    boolean result = fs.delete(p, false);
    assertTrue("Couldn't delete temp file!", result);
  }

  BufferedWriter w = new BufferedWriter(
      new OutputStreamWriter(fs.create(p)));
  w.append("This is a line!");
  w.close();

  FileInputFormat.addInputPath(job, p);

  // And set the InputFormat itself.
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
项目:hadoop    文件:LoadJob.java   
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  ugi.doAs(
    new PrivilegedExceptionAction<Job>() {
      public Job run() throws IOException, ClassNotFoundException,
                              InterruptedException {
        job.setMapperClass(LoadMapper.class);
        job.setReducerClass(LoadReducer.class);
        job.setNumReduceTasks(jobdesc.getNumberReduces());
        job.setMapOutputKeyClass(GridmixKey.class);
        job.setMapOutputValueClass(GridmixRecord.class);
        job.setSortComparatorClass(LoadSortComparator.class);
        job.setGroupingComparatorClass(SpecGroupingComparator.class);
        job.setInputFormatClass(LoadInputFormat.class);
        job.setOutputFormatClass(RawBytesOutputFormat.class);
        job.setPartitionerClass(DraftPartitioner.class);
        job.setJarByClass(LoadJob.class);
        job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true);
        FileOutputFormat.setOutputPath(job, outdir);
        job.submit();
        return job;
      }
    });

  return job;
}
项目:learn-to-hadoop    文件:MaxTemperatureWithCombiner.java   
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperatureWithCombiner.class);
    job.setJobName("Max Temperature With Combiner");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:Wikipedia-Index    文件:TF_IDF.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job =Job.getInstance(conf);
    job.setJobName("TF-IDFCount");
    job.setJarByClass(TF_IDF.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(TF_IDFMap.class);
    job.setReducerClass(TF_IDFReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:hadoop    文件:TestChainErrors.java   
/**
 * Tests one of the maps consuming output.
 * 
 * @throws Exception
 */
public void testChainMapNoOuptut() throws Exception {
  Configuration conf = createJobConf();
  String expectedOutput = "";

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input);
  job.setJobName("chain");

  ChainMapper.addMapper(job, ConsumeMap.class, IntWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  job.waitForCompletion(true);
  assertTrue("Job failed", job.isSuccessful());
  assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil
      .readOutput(outDir, conf));
}
项目:ditb    文件:IntegrationTestTableMapReduceUtil.java   
/**
 * Look for jars we expect to be on the classpath by name.
 */
@Test
public void testAddDependencyJars() throws Exception {
  Job job = new Job();
  TableMapReduceUtil.addDependencyJars(job);
  String tmpjars = job.getConfiguration().get("tmpjars");

  // verify presence of modules
  assertTrue(tmpjars.contains("hbase-common"));
  assertTrue(tmpjars.contains("hbase-protocol"));
  assertTrue(tmpjars.contains("hbase-client"));
  assertTrue(tmpjars.contains("hbase-hadoop-compat"));
  assertTrue(tmpjars.contains("hbase-server"));

  // verify presence of 3rd party dependencies.
  assertTrue(tmpjars.contains("zookeeper"));
  assertTrue(tmpjars.contains("netty"));
  assertTrue(tmpjars.contains("protobuf"));
  assertTrue(tmpjars.contains("guava"));
  assertTrue(tmpjars.contains("htrace"));
}
项目:hadoop    文件:MapReduceTestUtil.java   
/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}
项目:ditb    文件:CellCounter.java   
/**
 * Main entry point.
 *
 * @param args The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("ERROR: Wrong number of parameters: " + args.length);
    System.err.println("Usage: CellCounter ");
    System.err.println("       <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
      "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
    System.err.println("  Note: -D properties will be applied to the conf used. ");
    System.err.println("  Additionally, the following SCAN properties can be specified");
    System.err.println("  to get fine grained control on what is counted..");
    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
    System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
        "string : used to separate the rowId/column family name and qualifier name.");
    System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
        "operation to a limited subset of rows from the table based on regex or prefix pattern.");
    System.exit(-1);
  }
  Job job = createSubmittableJob(conf, otherArgs);
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:Hadoop-Codes    文件:testDriver.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "test");

    job.setMapperClass(testMapper.class);
    job.setPartitionerClass(testPartitioner.class);
    job.setReducerClass(testReducer.class);
    job.setNumReduceTasks(10);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if (!job.waitForCompletion(true))
        return;
}
项目:Hadoop-Codes    文件:MaxTempDriver.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "maxtemp");

    job.setMapperClass(MaxTempMapper.class);
    job.setReducerClass(MaxTempReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(FloatWritable.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if (!job.waitForCompletion(true))
        return;
}
项目:aliyun-maxcompute-data-collectors    文件:MergeJob.java   
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
    throws IOException {
  LOG.info("Trying to merge avro files");
  final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
  final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
  if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
    throw new IOException("Invalid schema for input directories. Schema for old data: ["
        + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
  }
  LOG.debug("Avro Schema:" + oldPathSchema);
  job.setInputFormatClass(AvroInputFormat.class);
  job.setOutputFormatClass(AvroOutputFormat.class);
  job.setMapperClass(MergeAvroMapper.class);
  job.setReducerClass(MergeAvroReducer.class);
  AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
项目:ditb    文件:SampleUploader.java   
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  Path inputPath = new Path(args[0]);
  String tableName = args[1];
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(Uploader.class);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(Uploader.class);
  // No reducers.  Just write straight to table.  Call initTableReducerJob
  // because it sets up the TableOutputFormat.
  TableMapReduceUtil.initTableReducerJob(tableName, null, job);
  job.setNumReduceTasks(0);
  return job;
}
项目:ditb    文件:TestTableInputFormat.java   
void testInputFormat(Class<? extends InputFormat> clazz)
    throws IOException, InterruptedException, ClassNotFoundException {
  final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
  job.setInputFormatClass(clazz);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);

  LOG.debug("submitting job.");
  assertTrue("job failed!", job.waitForCompletion(true));
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
  assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
  assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
  assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
}
项目:ditb    文件:TestHFileOutputFormat2.java   
private void runIncrementalPELoad(Configuration conf, HTableDescriptor tableDescriptor,
    RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException,
    InterruptedException, ClassNotFoundException {
  Job job = new Job(conf, "testLocalMRIncrementalLoad");
  job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());
  setupRandomGeneratorMapper(job);
  HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
  FileOutputFormat.setOutputPath(job, outDir);

  assertFalse(util.getTestFileSystem().exists(outDir)) ;

  assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

  assertTrue(job.waitForCompletion(true));
}
项目:ditb    文件:PerformanceEvaluation.java   
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
      InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}
项目:aliyun-maxcompute-data-collectors    文件:ExportJobBase.java   
@Override
protected void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

 if (options.getOdpsTable() != null) {
    Configuration conf = job.getConfiguration();
    setInputFormatClass(OdpsExportInputFormat.class);
    conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable());
    conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID());
    conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey());
    conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint());
    conf.set(OdpsConstants.PROJECT, options.getOdpsProject());
    String partitionSpec = options.getOdpsPartitionSpec();
    if (partitionSpec != null) {
      conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec);
    }
    setMapperClass(OdpsExportMapper.class);
  }
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
  if (!isHCatJob && options.getOdpsTable() == null) {
    FileInputFormat.addInputPath(job, getInputPath());
  }

}
项目:hadoop    文件:TestCLI.java   
@Test
public void testListAttemptIdsWithInvalidInputs() throws Exception {
  JobID jobId = JobID.forName(jobIdStr);
  Cluster mockCluster = mock(Cluster.class);
  Job job = mock(Job.class);
  CLI cli = spy(new CLI());

  doReturn(mockCluster).when(cli).createCluster();
  when(mockCluster.getJob(jobId)).thenReturn(job);

  int retCode_JOB_SETUP = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "JOB_SETUP", "running" });

  int retCode_JOB_CLEANUP = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "JOB_CLEANUP", "running" });

  int retCode_invalidTaskState = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "REDUCE", "complete" });

  assertEquals("JOB_SETUP is an invalid input,exit code should be -1", -1,
      retCode_JOB_SETUP);
  assertEquals("JOB_CLEANUP is an invalid input,exit code should be -1", -1,
      retCode_JOB_CLEANUP);
  assertEquals("complete is an invalid input,exit code should be -1", -1,
      retCode_invalidTaskState);

}
项目:hadoop    文件:TestJobCounters.java   
@Test
public void testNewCounterC() throws Exception {
  final Job job = createJob();
  final Configuration conf = job.getConfiguration();
  conf.setInt(JobContext.IO_SORT_FACTOR, 3);
  createWordsFile(inFiles[3], conf);
  createWordsFile(inFiles[4], conf);
  long inputSize = 0;
  inputSize += getFileSize(inFiles[0]);
  inputSize += getFileSize(inFiles[1]);
  inputSize += getFileSize(inFiles[2]);
  inputSize += getFileSize(inFiles[3]);
  inputSize += getFileSize(inFiles[4]);
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
      job, IN_DIR);
  org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
      job, new Path(OUT_DIR, "outputN2"));
  assertTrue(job.waitForCompletion(true));
  final Counters c1 = Counters.downgrade(job.getCounters());
  validateCounters(c1, 122880, 25600, 102400);
  validateFileCounters(c1, inputSize, 0, 0, 0);
}
项目:hadoop    文件:DataDrivenDBInputFormat.java   
/** Note that the "orderBy" column is called the "splitBy" in this version.
  * We reuse the same field, but it's not strictly ordering it -- just partitioning
  * the results.
  */
public static void setInput(Job job, 
    Class<? extends DBWritable> inputClass,
    String tableName,String conditions, 
    String splitBy, String... fieldNames) {
  DBInputFormat.setInput(job, inputClass, tableName, conditions, splitBy, fieldNames);
  job.setInputFormatClass(DataDrivenDBInputFormat.class);
}
项目:hadoop    文件:ValueAggregatorJob.java   
public static JobControl createValueAggregatorJobs(String args[],
  Class<? extends ValueAggregatorDescriptor>[] descriptors) 
throws IOException {

  JobControl theControl = new JobControl("ValueAggregatorJobs");
  ArrayList<ControlledJob> dependingJobs = new ArrayList<ControlledJob>();
  Configuration conf = new Configuration();
  if (descriptors != null) {
    conf = setAggregatorDescriptors(descriptors);
  }
  Job job = createValueAggregatorJob(conf, args);
  ControlledJob cjob = new ControlledJob(job, dependingJobs);
  theControl.addJob(cjob);
  return theControl;
}
项目:hadoop    文件:SecondarySort.java   
public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length != 2) {
    System.err.println("Usage: secondarysort <in> <out>");
    System.exit(2);
  }
  Job job = Job.getInstance(conf, "secondary sort");
  job.setJarByClass(SecondarySort.class);
  job.setMapperClass(MapClass.class);
  job.setReducerClass(Reduce.class);

  // group and partition by the first int in the pair
  job.setPartitionerClass(FirstPartitioner.class);
  job.setGroupingComparatorClass(FirstGroupingComparator.class);

  // the map output is IntPair, IntWritable
  job.setMapOutputKeyClass(IntPair.class);
  job.setMapOutputValueClass(IntWritable.class);

  // the reduce output is Text, IntWritable
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);

  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:DocIT    文件:Total.java   
public static void total(String name, String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    conf.set(QUERIED_NAME, name);
    Job job = Job.getInstance(new Cluster(conf), conf);
    job.setJarByClass(Total.class);

    // in
    if (!in.endsWith("/"))
        in = in.concat("/");
    in = in.concat("employees");
    SequenceFileInputFormat.addInputPath(job, new Path(in));
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // map
    job.setMapperClass(TotalMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    // reduce
    job.setCombinerClass(TotalReducer.class);
    job.setReducerClass(TotalReducer.class);

    // out
    SequenceFileOutputFormat.setOutputPath(job, new Path(out));
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.waitForCompletion(true);
}
项目:LDA    文件:InitDriver.java   
public static void run(Configuration conf, Path[] inputPath, Path outputPath) throws IOException, ClassNotFoundException, InterruptedException {
        String jobName = "init matrix";
        Job job = new Job(conf, jobName);

        job.setMapOutputKeyClass(twoDimensionIndexWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(twoDimensionIndexWritable.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setMapperClass(InitMapper.class);
        job.setReducerClass(InitReducer.class);
        job.setNumReduceTasks(1);

        for(Path path : inputPath) {
            FileInputFormat.addInputPath(job, path);
        }
        Path output = new Path(outputPath, "initDir");
        FileOutputFormat.setOutputPath(job, output);

        job.setJarByClass(LDADriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Init failed");
        }
}
项目:ditb    文件:WALPlayer.java   
@Override
public int run(String[] args) throws Exception {
  String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
  if (otherArgs.length < 2) {
    usage("Wrong number of arguments: " + otherArgs.length);
    System.exit(-1);
  }
  Job job = createSubmittableJob(otherArgs);
  return job.waitForCompletion(true) ? 0 : 1;
}
项目:ditb    文件:TestTableInputFormatScanBase.java   
/**
 * Tests a MR scan using specific start and stop rows.
 *
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
protected void testScan(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
    "To" + (stop != null ? stop.toUpperCase() : "Empty");
  LOG.info("Before map/reduce startup - job " + jobName);
  Configuration c = new Configuration(TEST_UTIL.getConfiguration());
  Scan scan = new Scan();
  scan.addFamily(INPUT_FAMILY);
  if (start != null) {
    scan.setStartRow(Bytes.toBytes(start));
  }
  c.set(KEY_STARTROW, start != null ? start : "");
  if (stop != null) {
    scan.setStopRow(Bytes.toBytes(stop));
  }
  c.set(KEY_LASTROW, last != null ? last : "");
  LOG.info("scan before: " + scan);
  Job job = new Job(c, jobName);
  TableMapReduceUtil.initTableMapperJob(
    Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
    ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
  job.setReducerClass(ScanReducer.class);
  job.setNumReduceTasks(1); // one to get final "first" and "last" key
  FileOutputFormat.setOutputPath(job,
      new Path(TEST_UTIL.getDataTestDir(), job.getJobName()));
  LOG.info("Started " + job.getJobName());
  assertTrue(job.waitForCompletion(true));
  LOG.info("After map/reduce completion - job " + jobName);
}
项目:ditb    文件:TableMapReduceUtil.java   
/**
 * Use this before submitting a TableMap job. It will appropriately set up
 * the job.
 *
 * @param table  The table name to read from.
 * @param scan  The scan instance with the columns, time range etc.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(TableName table,
    Scan scan,
    Class<? extends TableMapper> mapper,
    Class<?> outputKeyClass,
    Class<?> outputValueClass,
    Job job) throws IOException {
  initTableMapperJob(table.getNameAsString(),
      scan,
      mapper,
      outputKeyClass,
      outputValueClass,
      job,
      true);
}
项目:big_data    文件:ActiveUserRunner.java   
/**
 * 初始化scan集合
 * 
 * @param job
 * @return
 */
private List<Scan> initScans(Job job) {
    Configuration conf = job.getConfiguration();
    // 获取运行时间: yyyy-MM-dd
    String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
    long startDate = TimeUtil.parseString2Long(date);
    long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;

    Scan scan = new Scan();
    // 定义hbase扫描的开始rowkey和结束rowkey
    scan.setStartRow(Bytes.toBytes("" + startDate));
    scan.setStopRow(Bytes.toBytes("" + endDate));

    FilterList filterList = new FilterList();
    // 定义mapper中需要获取的列名
    String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_UUID, // 用户id
            EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, // 服务器时间
            EventLogConstants.LOG_COLUMN_NAME_PLATFORM, // 平台名称
            EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, // 浏览器名称
            EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION // 浏览器版本号
    };
    filterList.addFilter(this.getColumnFilter(columns));

    scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
    scan.setFilter(filterList);
    return Lists.newArrayList(scan);
}
项目:hadoop    文件:TestBinaryTokenFile.java   
private void setupBinaryTokenFile(Job job) {
// Credentials in the job will not have delegation tokens
// because security is disabled. Fetch delegation tokens
// and store in binary token file.
  createBinaryTokenFile(job.getConfiguration());
  job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY,
      binaryTokenFileName.toString());
  // NB: the MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY
  // key now gets deleted from config,
  // so it's not accessible in the job's config. So,
  // we use another key to pass the file name into the job configuration:
  job.getConfiguration().set(KEY_SECURITY_TOKEN_FILE_NAME,
      binaryTokenFileName.toString());
}
项目:hadoop    文件:TestCLI.java   
@Test
public void testListAttemptIdsWithValidInput() throws Exception {
  JobID jobId = JobID.forName(jobIdStr);
  Cluster mockCluster = mock(Cluster.class);
  Job job = mock(Job.class);
  CLI cli = spy(new CLI());

  doReturn(mockCluster).when(cli).createCluster();
  when(job.getTaskReports(TaskType.MAP)).thenReturn(
      getTaskReports(jobId, TaskType.MAP));
  when(job.getTaskReports(TaskType.REDUCE)).thenReturn(
      getTaskReports(jobId, TaskType.REDUCE));
  when(mockCluster.getJob(jobId)).thenReturn(job);

  int retCode_MAP = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "MAP", "running" });
  // testing case insensitive behavior
  int retCode_map = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "map", "running" });

  int retCode_REDUCE = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
      "REDUCE", "running" });

  int retCode_completed = cli.run(new String[] { "-list-attempt-ids",
      jobIdStr, "REDUCE", "completed" });

  assertEquals("MAP is a valid input,exit code should be 0", 0, retCode_MAP);
  assertEquals("map is a valid input,exit code should be 0", 0, retCode_map);
  assertEquals("REDUCE is a valid input,exit code should be 0", 0,
      retCode_REDUCE);
  assertEquals(
      "REDUCE and completed are a valid inputs to -list-attempt-ids,exit code should be 0",
      0, retCode_completed);

  verify(job, times(2)).getTaskReports(TaskType.MAP);
  verify(job, times(2)).getTaskReports(TaskType.REDUCE);
}
项目:hadoop    文件:MultiFileWordCount.java   
public int run(String[] args) throws Exception {

    if(args.length < 2) {
      printUsage();
      return 2;
    }

    Job job = Job.getInstance(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClass(MultiFileWordCount.class);

    //set the InputFormat of the job to our InputFormat
    job.setInputFormatClass(MyInputFormat.class);

    // the keys are words (strings)
    job.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    job.setOutputValueClass(IntWritable.class);

    //use the defined mapper
    job.setMapperClass(MapClass.class);
    //use the WordCount Reducer
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
  }
项目:ditb    文件:TestHFileOutputFormat2.java   
/**
 * Run small MR job.
 */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
public void testWritingPEData() throws Exception {
  Configuration conf = util.getConfiguration();
  Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
  FileSystem fs = testDir.getFileSystem(conf);

  // Set down this value or we OOME in eclipse.
  conf.setInt("mapreduce.task.io.sort.mb", 20);
  // Write a few files.
  conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);

  Job job = new Job(conf, "testWritingPEData");
  setupRandomGeneratorMapper(job);
  // This partitioner doesn't work well for number keys but using it anyways
  // just to demonstrate how to configure it.
  byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
  byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

  Arrays.fill(startKey, (byte)0);
  Arrays.fill(endKey, (byte)0xff);

  job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
  // Set start and end rows for partitioner.
  SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
  SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
  job.setReducerClass(KeyValueSortReducer.class);
  job.setOutputFormatClass(HFileOutputFormat2.class);
  job.setNumReduceTasks(4);
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());

  FileOutputFormat.setOutputPath(job, testDir);
  assertTrue(job.waitForCompletion(false));
  FileStatus [] files = fs.listStatus(testDir);
  assertTrue(files.length > 0);
}
项目:hadoop    文件:DistCp.java   
/**
 * Create and submit the mapreduce job.
 * @return The mapreduce job object that has been submitted
 */
public Job createAndSubmitJob() throws Exception {
  assert inputOptions != null;
  assert getConf() != null;
  Job job = null;
  try {
    synchronized(this) {
      //Don't cleanup while we are setting up.
      metaFolder = createMetaFolderPath();
      jobFS = metaFolder.getFileSystem(getConf());
      job = createJob();
    }
    if (inputOptions.shouldUseDiff()) {
      if (!DistCpSync.sync(inputOptions, getConf())) {
        inputOptions.disableUsingDiff();
      }
    }
    createInputFileListing(job);

    job.submit();
    submitted = true;
  } finally {
    if (!submitted) {
      cleanup();
    }
  }

  String jobID = job.getJobID().toString();
  job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);
  LOG.info("DistCp job-id: " + jobID);

  return job;
}
项目:hadoop    文件:FailJob.java   
public int run(String[] args) throws Exception {
  if(args.length < 1) {
    System.err.println("FailJob " +
        " (-failMappers|-failReducers)");
    ToolRunner.printGenericCommandUsage(System.err);
    return 2;
  }
  boolean failMappers = false, failReducers = false;

  for (int i = 0; i < args.length; i++ ) {
    if (args[i].equals("-failMappers")) {
      failMappers = true;
    }
    else if(args[i].equals("-failReducers")) {
      failReducers = true;
    }
  }
  if (!(failMappers ^ failReducers)) {
    System.err.println("Exactly one of -failMappers or -failReducers must be specified.");
    return 3;
  }

  // Write a file with one line per mapper.
  final FileSystem fs = FileSystem.get(getConf());
  Path inputDir = new Path(FailJob.class.getSimpleName() + "_in");
  fs.mkdirs(inputDir);
  for (int i = 0; i < getConf().getInt("mapred.map.tasks", 1); ++i) {
    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(
        fs.create(new Path(inputDir, Integer.toString(i)))));
    w.write(Integer.toString(i) + "\n");
    w.close();
  }

  Job job = createJob(failMappers, failReducers, inputDir);
  return job.waitForCompletion(true) ? 0 : 1;
}
项目:ditb    文件:TableMapReduceUtil.java   
/**
 * Use this before submitting a Multi TableMap job. It will appropriately set
 * up the job.
 *
 * @param scans The list of {@link Scan} objects to read from.
 * @param mapper The mapper class to use.
 * @param outputKeyClass The class of the output key.
 * @param outputValueClass The class of the output value.
 * @param job The current job to adjust. Make sure the passed job is carrying
 *          all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the
 *          configured job classes via the distributed cache (tmpjars).
 * @param initCredentials whether to initialize hbase auth credentials for the job
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(List<Scan> scans,
    Class<? extends TableMapper> mapper,
    Class<?> outputKeyClass,
    Class<?> outputValueClass, Job job,
    boolean addDependencyJars,
    boolean initCredentials) throws IOException {
  job.setInputFormatClass(MultiTableInputFormat.class);
  if (outputValueClass != null) {
    job.setMapOutputValueClass(outputValueClass);
  }
  if (outputKeyClass != null) {
    job.setMapOutputKeyClass(outputKeyClass);
  }
  job.setMapperClass(mapper);
  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
  List<String> scanStrings = new ArrayList<String>();

  for (Scan scan : scans) {
    scanStrings.add(convertScanToString(scan));
  }
  job.getConfiguration().setStrings(MultiTableInputFormat.SCANS,
    scanStrings.toArray(new String[scanStrings.size()]));

  if (addDependencyJars) {
    addDependencyJars(job);
  }

  if (initCredentials) {
    initCredentials(job);
  }
}
项目:ditb    文件:IntegrationTestLoadAndVerify.java   
@Override
public Job createSubmittableJob(String[] args) throws IOException {
  Job job = super.createSubmittableJob(args);
  // Call my class instead.
  job.setJarByClass(WALMapperSearcher.class);
  job.setMapperClass(WALMapperSearcher.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  return job;
}
项目:mapreduce-samples    文件:UnitSum.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);
        job.setMapperClass(PassMapper.class);
        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.waitForCompletion(true);
    }
项目:ditb    文件:TestTableMapReduceUtil.java   
@Test
public void testInitTableMapperJob1() throws Exception {
  Configuration configuration = new Configuration();
  Job job = new Job(configuration, "tableName");
  // test 
  TableMapReduceUtil.initTableMapperJob("Table", new Scan(), Import.Importer.class, Text.class,
      Text.class, job, false, WALInputFormat.class);
  assertEquals(WALInputFormat.class, job.getInputFormatClass());
  assertEquals(Import.Importer.class, job.getMapperClass());
  assertEquals(LongWritable.class, job.getOutputKeyClass());
  assertEquals(Text.class, job.getOutputValueClass());
  assertNull(job.getCombinerClass());
  assertEquals("Table", job.getConfiguration().get(TableInputFormat.INPUT_TABLE));
}
项目:aliyun-maxcompute-data-collectors    文件:HdfsOdpsImportJob.java   
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
    throws IOException {

  ArrayList<FileStatus> files = new ArrayList<FileStatus>();
  FileStatus[] dirs;
  dirs = fs.globStatus(fs.makeQualified(getInputPath()));
  for (int i = 0; (dirs != null && i < dirs.length); i++) {
    files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
    // We only check one file, so exit the loop when we have at least
    // one.
    if (files.size() > 0) {
      break;
    }
  }

  ParquetMetadata parquetMetadata;
  try {
    parquetMetadata =
        ParquetFileReader.readFooter(job.getConfiguration(),
            fs.makeQualified(files.get(0).getPath()));
  } catch (IOException e) {
    LOG.error("Wrong file format. Please check the export file's format.", e);
    throw e;
  }
  MessageType schema = parquetMetadata.getFileMetaData().getSchema();
  Schema avroSchema = new AvroSchemaConverter().convert(schema);
  DatasetDescriptor descriptor =
      new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
          .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
  return descriptor;
}
项目:ditb    文件:MapreduceTestingShim.java   
@Override
public Job newJob(Configuration conf) throws IOException {
  // Implementing:
  // return new Job(conf);
  Constructor<Job> c;
  try {
    c = Job.class.getConstructor(Configuration.class);
    return c.newInstance(conf);
  } catch (Exception e) {
    throw new IllegalStateException(
        "Failed to instantiate new Job(conf)", e);
  }
}
项目:mumu-mapreduce    文件:MaxTemperatureMapReduce.java   
public static void main(String[] args) {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }

    String temperatureInput = args[0];
    String temperatureOutput = args[1];
    try {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(MaxTemperatureMapReduce.class);
        job.setJobName("MaxTemperature");
        FileInputFormat.addInputPath(job, new Path(temperatureInput));
        FileOutputFormat.setOutputPath(job, new Path(temperatureOutput));

        job.setMapperClass(MaxTemperatureMapper.class);
        job.setReducerClass(MaxTemperatureReduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        e.printStackTrace();
    }
}