Java 类org.apache.hadoop.mapreduce.lib.input.FileInputFormat 实例源码

项目:aliyun-maxcompute-data-collectors    文件:TestImportJob.java   
@Override
public void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

  // Write a line of text into a file so that we can get
  // a record to the map task.
  Path dir = new Path(this.options.getTempDir());
  Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
  FileSystem fs = FileSystem.getLocal(this.options.getConf());
  if (fs.exists(p)) {
    boolean result = fs.delete(p, false);
    assertTrue("Couldn't delete temp file!", result);
  }

  BufferedWriter w = new BufferedWriter(
      new OutputStreamWriter(fs.create(p)));
  w.append("This is a line!");
  w.close();

  FileInputFormat.addInputPath(job, p);

  // And set the InputFormat itself.
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
项目:LDA    文件:CalParamDriver.java   
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "calculating parameter";
    conf.set("params",String.valueOf(params));

    Job job = new Job(conf, jobName);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(indexToCountWritable.class);
    job.setOutputKeyClass(twoDimensionIndexWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(CalParamsMapper.class);
    job.setReducerClass(CalParamsReducer.class);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job,output);

    job.setJarByClass(LDADriver.class);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("calculating parameter failed");
    }
}
项目:DocIT    文件:Cut.java   
public static void cut(String name, String in) throws Exception {
    Job job = createJob(name, in);
    job.setNumReduceTasks(0);
    boolean success = job.waitForCompletion(true);

    if (success) {
        // MapReduce reads an input and writes the result to an new location.
        // Hence it can not modify data in place, and we have to replace the input
        // with the output
        Path output = FileOutputFormat.getOutputPath(job);
        FileSystem fs = output.getFileSystem(job.getConfiguration());
        Path[] inputs = FileInputFormat.getInputPaths(job);
        for (int i = 0; i < inputs.length; i++) {
            fs.delete(inputs[i], true);
        }
        fs.rename(output, inputs[0]);
    }
}
项目:Wikipedia-Index    文件:TF.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("xmlinput.start", "<page>");
    conf.set("xmlinput.end", "</page>");

    Job job =Job.getInstance(conf);
    job.setJobName("TermFrequencyCount");
    job.setJarByClass(TF.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntArrayWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(TFMap.class);
    job.setReducerClass(TFReduce.class);

    job.setInputFormatClass(XmlInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index    文件:ExtractPage.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("xmlinput.start", "<page>");
    conf.set("xmlinput.end", "</page>");

    Job job =Job.getInstance(conf);
    job.setJobName("ExrtactPages");
    job.setJarByClass(ExtractPage.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(ExtractPageMap.class);
    job.setReducerClass(ExtractPageReduce.class);

    job.setInputFormatClass(XmlInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index    文件:DF.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job =Job.getInstance(conf);
    job.setJobName("DocumentFrequencyCount");
    job.setJarByClass(DF.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(DFMap.class);
    job.setReducerClass(DFReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index    文件:MaxThreeLabel.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job =Job.getInstance(conf);
    job.setJobName("MaxThreeLabel");
    job.setJarByClass(MaxThreeLabel.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MaxThreeLabelMap.class);
    job.setReducerClass(MaxThreeLabelReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index    文件:PageWordCount.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("xmlinput.start", "<page>");
    conf.set("xmlinput.end", "</page>");

    Job job =Job.getInstance(conf);
    job.setJobName("PageWordCount");
    job.setJarByClass(PageWordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(PageWordCountMap.class);
    job.setCombinerClass(PageWordCountReduce.class);
    job.setReducerClass(PageWordCountReduce.class);

    job.setInputFormatClass(XmlInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.waitForCompletion(true);
}
项目:Wikipedia-Index    文件:TF_IDF.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job =Job.getInstance(conf);
    job.setJobName("TF-IDFCount");
    job.setJarByClass(TF_IDF.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(TF_IDFMap.class);
    job.setReducerClass(TF_IDFReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:HBase-High-Performance-Cookbook    文件:InputDriver.java   
public static void runJob(Path input, Path output, String vectorClassName,Configuration config)
  throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = config;
  conf.set("vector.implementation.class.name", vectorClassName);
  Job job = new Job(conf, "Input Driver running over input: " + input);

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(VectorWritable.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setMapperClass(InputMapper.class);   
  job.setNumReduceTasks(0);
  job.setJarByClass(InputDriver.class);

  FileInputFormat.addInputPath(job, input);
  FileOutputFormat.setOutputPath(job, output);

  job.waitForCompletion(true);
}
项目:Hadoop-Codes    文件:MaximumAverageDriver.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "maxaverage");

    job.setMapperClass(MaximumAverageMapper.class);
    job.setReducerClass(MaximumAverageReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(FloatWritable.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if (!job.waitForCompletion(true))
        return;
}
项目:Hadoop-Codes    文件:testDriver.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "test");

    job.setMapperClass(testMapper.class);
    job.setPartitionerClass(testPartitioner.class);
    job.setReducerClass(testReducer.class);
    job.setNumReduceTasks(10);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if (!job.waitForCompletion(true))
        return;
}
项目:Hadoop-Codes    文件:SystemUserDriver.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "systemuser");

    //job.setMapperClass(SystemUserMapper.class);
    job.setMapperClass(DailyCount.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if (!job.waitForCompletion(true))
        return;
}
项目:Hadoop-Codes    文件:MaxTempDriver.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "maxtemp");

    job.setMapperClass(MaxTempMapper.class);
    job.setReducerClass(MaxTempReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(FloatWritable.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if (!job.waitForCompletion(true))
        return;
}
项目:aliyun-maxcompute-data-collectors    文件:ExportJobBase.java   
@Override
protected void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

 if (options.getOdpsTable() != null) {
    Configuration conf = job.getConfiguration();
    setInputFormatClass(OdpsExportInputFormat.class);
    conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable());
    conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID());
    conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey());
    conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint());
    conf.set(OdpsConstants.PROJECT, options.getOdpsProject());
    String partitionSpec = options.getOdpsPartitionSpec();
    if (partitionSpec != null) {
      conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec);
    }
    setMapperClass(OdpsExportMapper.class);
  }
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
  if (!isHCatJob && options.getOdpsTable() == null) {
    FileInputFormat.addInputPath(job, getInputPath());
  }

}
项目:hadoop    文件:CredentialsTestJob.java   
public Job createJob() 
throws IOException {
  Configuration conf = getConf();
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(conf, "test");
  job.setNumReduceTasks(1);
  job.setJarByClass(CredentialsTestJob.class);
  job.setNumReduceTasks(1);
  job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
  job.setInputFormatClass(SleepJob.SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("test job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}
项目:hadoop    文件:FailJob.java   
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}
项目:hadoop    文件:MapReduceTestUtil.java   
/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}
项目:hadoop    文件:MapReduceTestUtil.java   
/**
 * Creates a simple fail job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple fail job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createFailJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  FileSystem fs = outdir.getFileSystem(conf);
  if (fs.exists(outdir)) {
    fs.delete(outdir, true);
  }
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("Fail-Job");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(FailMapper.class);
  theJob.setReducerClass(Reducer.class);
  theJob.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  return theJob;
}
项目:hadoop    文件:MapReduceTestUtil.java   
public static Job createJob(Configuration conf, Path inDir, Path outDir, 
    int numInputFiles, int numReds, String input) throws IOException {
  Job job = Job.getInstance(conf);
  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (fs.exists(inDir)) {
    fs.delete(inDir, true);
  }
  fs.mkdirs(inDir);
  for (int i = 0; i < numInputFiles; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }    

  FileInputFormat.setInputPaths(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);
  job.setNumReduceTasks(numReds);
  return job;
}
项目:hadoop    文件:TestMapReduceLazyOutput.java   
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = Job.getInstance(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
项目:hadoop    文件:SleepJob.java   
public Job createJob(int numMapper, int numReducer, 
                     long mapSleepTime, int mapSleepCount, 
                     long reduceSleepTime, int reduceSleepCount) 
    throws IOException {
  Configuration conf = getConf();
  conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
  conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
  conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
  conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
  conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
  Job job = Job.getInstance(conf, "sleep");
  job.setNumReduceTasks(numReducer);
  job.setJarByClass(SleepJob.class);
  job.setMapperClass(SleepMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(SleepReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(SleepInputFormat.class);
  job.setPartitionerClass(SleepJobPartitioner.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Sleep job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}
项目:hadoop    文件:TestLocalRunner.java   
/**
 * Run a test with a misconfigured number of mappers.
 * Expect failure.
 */
@Test
public void testInvalidMultiMapParallelism() throws Exception {
  Job job = Job.getInstance();

  Path inputPath = createMultiMapsInput();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);

  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.setMapperClass(StressMapper.class);
  job.setReducerClass(CountingReducer.class);
  job.setNumReduceTasks(1);
  LocalJobRunner.setLocalMaxRunningMaps(job, -6);
  FileInputFormat.addInputPath(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertFalse("Job succeeded somehow", success);
}
项目:hadoop    文件:LocatedFileStatusFetcher.java   
/**
 * @param conf configuration for the job
 * @param dirs the initial list of paths
 * @param recursive whether to traverse the patchs recursively
 * @param inputFilter inputFilter to apply to the resulting paths
 * @param newApi whether using the mapred or mapreduce API
 * @throws InterruptedException
 * @throws IOException
 */
public LocatedFileStatusFetcher(Configuration conf, Path[] dirs,
    boolean recursive, PathFilter inputFilter, boolean newApi) throws InterruptedException,
    IOException {
  int numThreads = conf.getInt(FileInputFormat.LIST_STATUS_NUM_THREADS,
      FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS);
  rawExec = Executors.newFixedThreadPool(
      numThreads,
      new ThreadFactoryBuilder().setDaemon(true)
          .setNameFormat("GetFileInfo #%d").build());
  exec = MoreExecutors.listeningDecorator(rawExec);
  resultQueue = new LinkedBlockingQueue<List<FileStatus>>();
  this.conf = conf;
  this.inputDirs = dirs;
  this.recursive = recursive;
  this.inputFilter = inputFilter;
  this.newApi = newApi;
}
项目:hadoop    文件:ControlledJob.java   
/**
 * Submit this job to mapred. The state becomes RUNNING if submission 
 * is successful, FAILED otherwise.  
 */
protected synchronized void submit() {
  try {
    Configuration conf = job.getConfiguration();
    if (conf.getBoolean(CREATE_DIR, false)) {
      FileSystem fs = FileSystem.get(conf);
      Path inputPaths[] = FileInputFormat.getInputPaths(job);
      for (int i = 0; i < inputPaths.length; i++) {
        if (!fs.exists(inputPaths[i])) {
          try {
            fs.mkdirs(inputPaths[i]);
          } catch (IOException e) {

          }
        }
      }
    }
    job.submit();
    this.state = State.RUNNING;
  } catch (Exception ioe) {
    LOG.info(getJobName()+" got an error while submitting ",ioe);
    this.state = State.FAILED;
    this.message = StringUtils.stringifyException(ioe);
  }
}
项目:hadoop    文件:TestFileInputFormat.java   
@Test
public void testListLocatedStatus() throws Exception {
  Configuration conf = getConfiguration();
  conf.setBoolean("fs.test.impl.disable.cache", false);
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
  conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
      "test:///a1/a2");
  MockFileSystem mockFs =
      (MockFileSystem) new Path("test:///").getFileSystem(conf);
  Assert.assertEquals("listLocatedStatus already called",
      0, mockFs.numListLocatedStatusCalls);
  JobConf job = new JobConf(conf);
  TextInputFormat fileInputFormat = new TextInputFormat();
  fileInputFormat.configure(job);
  InputSplit[] splits = fileInputFormat.getSplits(job, 1);
  Assert.assertEquals("Input splits are not correct", 2, splits.length);
  Assert.assertEquals("listLocatedStatuss calls",
      1, mockFs.numListLocatedStatusCalls);
  FileSystem.closeAll();
}
项目:hadoop    文件:TestFileInputFormat.java   
@Test
public void testSplitLocationInfo() throws Exception {
  Configuration conf = getConfiguration();
  conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
      "test:///a1/a2");
  JobConf job = new JobConf(conf);
  TextInputFormat fileInputFormat = new TextInputFormat();
  fileInputFormat.configure(job);
  FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1);
  String[] locations = splits[0].getLocations();
  Assert.assertEquals(2, locations.length);
  SplitLocationInfo[] locationInfo = splits[0].getLocationInfo();
  Assert.assertEquals(2, locationInfo.length);
  SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
      locationInfo[0] : locationInfo[1];
  SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
      locationInfo[0] : locationInfo[1];
  Assert.assertTrue(localhostInfo.isOnDisk());
  Assert.assertTrue(localhostInfo.isInMemory());
  Assert.assertTrue(otherhostInfo.isOnDisk());
  Assert.assertFalse(otherhostInfo.isInMemory());
}
项目:hadoop    文件:TestFileInputFormat.java   
@Test
public void testListStatusSimple() throws IOException {
  Configuration conf = new Configuration();
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);

  List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .configureTestSimple(conf, localFs);

  JobConf jobConf = new JobConf(conf);
  TextInputFormat fif = new TextInputFormat();
  fif.configure(jobConf);
  FileStatus[] statuses = fif.listStatus(jobConf);

  org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
          localFs);
}
项目:hadoop    文件:TestFileInputFormat.java   
@Test
public void testListStatusNestedRecursive() throws IOException {
  Configuration conf = new Configuration();
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);

  List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .configureTestNestedRecursive(conf, localFs);
  JobConf jobConf = new JobConf(conf);
  TextInputFormat fif = new TextInputFormat();
  fif.configure(jobConf);
  FileStatus[] statuses = fif.listStatus(jobConf);

  org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
          localFs);
}
项目:hadoop    文件:TestFileInputFormat.java   
@Test
public void testListStatusNestedNonRecursive() throws IOException {
  Configuration conf = new Configuration();
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);

  List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .configureTestNestedNonRecursive(conf, localFs);
  JobConf jobConf = new JobConf(conf);
  TextInputFormat fif = new TextInputFormat();
  fif.configure(jobConf);
  FileStatus[] statuses = fif.listStatus(jobConf);

  org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
          localFs);
}
项目:hadoop    文件:TestFileInputFormat.java   
@Test
public void testListStatusErrorOnNonExistantDir() throws IOException {
  Configuration conf = new Configuration();
  conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);

  org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
      .configureTestErrorOnNonExistantDir(conf, localFs);
  JobConf jobConf = new JobConf(conf);
  TextInputFormat fif = new TextInputFormat();
  fif.configure(jobConf);
  try {
    fif.listStatus(jobConf);
    Assert.fail("Expecting an IOException for a missing Input path");
  } catch (IOException e) {
    Path expectedExceptionPath = new Path(TEST_ROOT_DIR, "input2");
    expectedExceptionPath = localFs.makeQualified(expectedExceptionPath);
    Assert.assertTrue(e instanceof InvalidInputException);
    Assert.assertEquals(
        "Input path does not exist: " + expectedExceptionPath.toString(),
        e.getMessage());
  }
}
项目:hadoop    文件:WordStandardDeviation.java   
@Override
public int run(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: wordstddev <in> <out>");
    return 0;
  }

  Configuration conf = getConf();

  Job job = Job.getInstance(conf, "word stddev");
  job.setJarByClass(WordStandardDeviation.class);
  job.setMapperClass(WordStandardDeviationMapper.class);
  job.setCombinerClass(WordStandardDeviationReducer.class);
  job.setReducerClass(WordStandardDeviationReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  FileInputFormat.addInputPath(job, new Path(args[0]));
  Path outputpath = new Path(args[1]);
  FileOutputFormat.setOutputPath(job, outputpath);
  boolean result = job.waitForCompletion(true);

  // read output and calculate standard deviation
  stddev = readAndCalcStdDev(outputpath, conf);

  return (result ? 0 : 1);
}
项目:hadoop    文件:WordCount.java   
public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: wordcount <in> [<in>...] <out>");
    System.exit(2);
  }
  Job job = Job.getInstance(conf, "word count");
  job.setJarByClass(WordCount.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }
  FileOutputFormat.setOutputPath(job,
    new Path(otherArgs[otherArgs.length - 1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:hadoop    文件:WordMean.java   
@Override
public int run(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: wordmean <in> <out>");
    return 0;
  }

  Configuration conf = getConf();

  Job job = Job.getInstance(conf, "word mean");
  job.setJarByClass(WordMean.class);
  job.setMapperClass(WordMeanMapper.class);
  job.setCombinerClass(WordMeanReducer.class);
  job.setReducerClass(WordMeanReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  FileInputFormat.addInputPath(job, new Path(args[0]));
  Path outputpath = new Path(args[1]);
  FileOutputFormat.setOutputPath(job, outputpath);
  boolean result = job.waitForCompletion(true);
  mean = readAndCalcMean(outputpath, conf);

  return (result ? 0 : 1);
}
项目:hadoop    文件:TeraValidate.java   
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
项目:hadoop    文件:CompressionEmulationUtil.java   
/**
 * Configure the {@link Job} for enabling compression emulation.
 */
static void configure(final Job job) throws IOException, InterruptedException,
                                            ClassNotFoundException {
  // set the random text mapper
  job.setMapperClass(RandomTextDataMapper.class);
  job.setNumReduceTasks(0);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setInputFormatClass(GenDataFormat.class);
  job.setJarByClass(GenerateData.class);

  // set the output compression true
  FileOutputFormat.setCompressOutput(job, true);
  try {
    FileInputFormat.addInputPath(job, new Path("ignored"));
  } catch (IOException e) {
    LOG.error("Error while adding input path ", e);
  }
}
项目:hadoop    文件:GenerateDistCacheData.java   
@Override
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  UserGroupInformation ugi = UserGroupInformation.getLoginUser();
  ugi.doAs( new PrivilegedExceptionAction <Job>() {
     public Job run() throws IOException, ClassNotFoundException,
                             InterruptedException {
      job.setMapperClass(GenDCDataMapper.class);
      job.setNumReduceTasks(0);
      job.setMapOutputKeyClass(NullWritable.class);
      job.setMapOutputValueClass(BytesWritable.class);
      job.setInputFormatClass(GenDCDataFormat.class);
      job.setOutputFormatClass(NullOutputFormat.class);
      job.setJarByClass(GenerateDistCacheData.class);
      try {
        FileInputFormat.addInputPath(job, new Path("ignored"));
      } catch (IOException e) {
        LOG.error("Error while adding input path ", e);
      }
      job.submit();
      return job;
    }
  });
  return job;
}
项目:learn-to-hadoop    文件:MaxTemperature.java   
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperature.class);
    job.setJobName("Max temperature");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:learn-to-hadoop    文件:MaxTemperatureWithCombiner.java   
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperatureWithCombiner.class);
    job.setJobName("Max Temperature With Combiner");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:LDA    文件:InputDriver.java   
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException {

        Job job = new Job(conf, "Input Drive running input:"+inputPath);
        log.info("start running InputDriver");
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(indexToWordWritable.class);
        job.setOutputKeyClass(twoDimensionIndexWritable.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(InputMapper.class);
        job.setReducerClass(InputReducer.class);
        job.setNumReduceTasks(1);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(InputDriver.class);

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, output);

        boolean succeeded = job.waitForCompletion(true);
        if (!succeeded) {
            throw new IllegalStateException("Job failed!");
        }

    }