Java 类org.apache.hadoop.mapreduce.lib.input.TextInputFormat 实例源码

项目:Wikipedia-Index    文件:DF.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job =Job.getInstance(conf);
    job.setJobName("DocumentFrequencyCount");
    job.setJarByClass(DF.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(DFMap.class);
    job.setReducerClass(DFReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index    文件:MaxThreeLabel.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job =Job.getInstance(conf);
    job.setJobName("MaxThreeLabel");
    job.setJarByClass(MaxThreeLabel.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MaxThreeLabelMap.class);
    job.setReducerClass(MaxThreeLabelReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index    文件:TF_IDF.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job =Job.getInstance(conf);
    job.setJobName("TF-IDFCount");
    job.setJarByClass(TF_IDF.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(TF_IDFMap.class);
    job.setReducerClass(TF_IDFReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
项目:hadoop    文件:FailJob.java   
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}
项目:hadoop    文件:TestMapReduceLazyOutput.java   
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = Job.getInstance(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
项目:hadoop    文件:UserNamePermission.java   
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = Job.getInstance(conf, "user name check"); 


  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);

  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:hadoop    文件:FieldSelectionMapper.java   
public void setup(Context context) 
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  this.fieldSeparator = 
    conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
  this.mapOutputKeyValueSpec = 
    conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
  try {
    this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
      context.getInputFormatClass().getCanonicalName());
  } catch (ClassNotFoundException e) {
    throw new IOException("Input format class not found", e);
  }
  allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
    mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
  LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
    mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
    mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
项目:mapreduce-samples    文件:Multiplication.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf);
    job.setJarByClass(Multiplication.class);

    ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);
    ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf);

    job.setMapperClass(CooccurrenceMapper.class);
    job.setMapperClass(RatingMapper.class);

    job.setReducerClass(MultiplicationReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class);

    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    job.waitForCompletion(true);
}
项目:mapreduce-samples    文件:Sum.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setMapperClass(SumMapper.class);
        job.setReducerClass(SumReducer.class);

        job.setJarByClass(Sum.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        TextOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
项目:mapreduce-samples    文件:DataDividerByUser.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setMapperClass(DataDividerMapper.class);
        job.setReducerClass(DataDividerReducer.class);

        job.setJarByClass(DataDividerByUser.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        TextOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
项目:mapreduce-samples    文件:Normalize.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setMapperClass(NormalizeMapper.class);
        job.setReducerClass(NormalizeReducer.class);

        job.setJarByClass(Normalize.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        TextOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
项目:mapreduce-samples    文件:UnitMultiplication.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
项目:mapreduce-samples    文件:UnitMultiplication.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
项目:mapreduce-samples    文件:UnitSum.java   
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);

        ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
        ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);

        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
项目:big-data-benchmark    文件:HadoopWordCount.java   
public static void main(String[] args) throws Exception {
    BasicConfigurator.configure();
    Configuration conf = new Configuration();
    conf.setQuietMode(true);

    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(HadoopWordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis()));

    long t = System.currentTimeMillis();
    job.waitForCompletion(true);

    System.out.println("TotalTime=" + (System.currentTimeMillis() - t));
}
项目:aliyun-oss-hadoop-fs    文件:FailJob.java   
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}
项目:aliyun-oss-hadoop-fs    文件:TestMapReduceLazyOutput.java   
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = Job.getInstance(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
项目:aliyun-oss-hadoop-fs    文件:UserNamePermission.java   
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = Job.getInstance(conf, "user name check"); 


  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);

  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:aliyun-oss-hadoop-fs    文件:FieldSelectionMapper.java   
public void setup(Context context) 
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  this.fieldSeparator = 
    conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
  this.mapOutputKeyValueSpec = 
    conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
  try {
    this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
      context.getInputFormatClass().getCanonicalName());
  } catch (ClassNotFoundException e) {
    throw new IOException("Input format class not found", e);
  }
  allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
    mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
  LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
    mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
    mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
项目:mutation-server    文件:SortJob.java   
@Override
public void setupJob(Job job) {

    job.setJarByClass(SortJob.class);
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(SortMap.class);
    job.setReducerClass(SortReducer.class);

    job.setMapOutputKeyClass(ReadKey.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setSortComparatorClass(ReadKeyComparator.class);
    job.setPartitionerClass(ReadKeyPartitioner.class);
    job.setGroupingComparatorClass(ReadKeyGroupingComparator.class);

}
项目:big-c    文件:FailJob.java   
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}
项目:big-c    文件:TestMapReduceLazyOutput.java   
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = Job.getInstance(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
项目:big-c    文件:UserNamePermission.java   
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = Job.getInstance(conf, "user name check"); 


  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);

  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:big-c    文件:FieldSelectionMapper.java   
public void setup(Context context) 
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  this.fieldSeparator = 
    conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
  this.mapOutputKeyValueSpec = 
    conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
  try {
    this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
      context.getInputFormatClass().getCanonicalName());
  } catch (ClassNotFoundException e) {
    throw new IOException("Input format class not found", e);
  }
  allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
    mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
  LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
    mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
    mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
项目:Bigdata    文件:Question4.java   
public static void main(String args[]) throws IOException,InterruptedException, ClassNotFoundException, URISyntaxException {

        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        conf.set("cachefile", otherArgs[0]);
        if (otherArgs.length != 3) {
            System.err.println("Usage: Question4 <cacheFile> <in> <out>");
            System.exit(3);
        }
        Job job = new Job(conf, "Question4");
        DistributedCache.addCacheFile(new URI(args[0]), conf);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FloatWritable.class); 
        job.setJarByClass(Question4.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
        job.waitForCompletion(true);
    }
项目:hadoop-knn    文件:KNN.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "KNN");
    job.setJarByClass(KNN.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setMapperClass(KnnMapper.class);
    job.setReducerClass(KnnReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
}
项目:Data-Science-with-Hadoop    文件:ReduceJoin.java   
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Reduce-side join");
    job.setJarByClass(ReduceJoin.class);
    job.setReducerClass(ReduceJoinReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, SalesRecordMapper.class) ;
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, AccountRecordMapper.class) ;
    //        FileOutputFormat.setOutputPath(job, new Path(args[2]));
    Path outputPath = new Path(args[2]);
    FileOutputFormat.setOutputPath(job, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:accumulo-examples    文件:WordCount.java   
@Override
public int run(String[] args) throws Exception {
  Opts opts = new Opts();
  opts.parseArgs(WordCount.class.getName(), args);

  Job job = Job.getInstance(getConf());
  job.setJobName(WordCount.class.getName());
  job.setJarByClass(this.getClass());

  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.setInputPaths(job, new Path(opts.inputDirectory));

  job.setMapperClass(MapClass.class);

  job.setNumReduceTasks(0);

  job.setOutputFormatClass(AccumuloOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Mutation.class);
  opts.setAccumuloConfigs(job);
  job.waitForCompletion(true);
  return 0;
}
项目:envelope    文件:TestFileSystemInput.java   
@Test
public void readInputFormat() throws Exception {
  Map<String, Object> paramMap = new HashMap<>();
  paramMap.put(FileSystemInput.FORMAT_CONFIG, "input-format");
  paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(CSV_DATA).getPath());
  paramMap.put(FileSystemInput.INPUT_FORMAT_TYPE_CONFIG, TextInputFormat.class.getCanonicalName());
  paramMap.put(FileSystemInput.INPUT_FORMAT_KEY_CONFIG, LongWritable.class.getCanonicalName());
  paramMap.put(FileSystemInput.INPUT_FORMAT_VALUE_CONFIG, Text.class.getCanonicalName());
  paramMap.put("translator.type", DummyInputFormatTranslator.class.getCanonicalName());
  config = ConfigFactory.parseMap(paramMap);

  FileSystemInput formatInput = new FileSystemInput();
  formatInput.configure(config);

  Dataset<Row> results = formatInput.read();

  assertEquals("Invalid number of rows", 4, results.count());
  assertEquals("Invalid first row result", 0L, results.first().getLong(0));
  assertEquals("Invalid first row result", "One,Two,Three,Four", results.first().getString(1));
}
项目:TopPI    文件:TopPIoverHadoop.java   
private boolean bigItemCount(String output) throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance(this.getConf(), "Counting items from " + this.input);
    job.setJarByClass(TopPIoverHadoop.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(this.input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(ItemBigCountingMapper.class);
    job.setReducerClass(ItemBigCountingReducer.class);

    boolean success = job.waitForCompletion(true);

    if (success) {
        Counter rebasingMaxID = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS);
        this.getConf().setInt(KEY_REBASING_MAX_ID, (int) rebasingMaxID.getValue());
    }

    return success;
}
项目:TopPI    文件:TopPIoverHadoop.java   
private boolean filterInput(String output, String rebasingMapPath) throws IOException, ClassNotFoundException,
        InterruptedException {
    Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
    job.setJarByClass(TopPIoverHadoop.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(ConcatenatedTransactionsWritable.class);
    DistCache.copyToCache(job, rebasingMapPath);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(FilteringMapper.class);
    job.setNumReduceTasks(0);

    return job.waitForCompletion(true);
}
项目:geomesa-tutorials    文件:GDELTIngest.java   
private static void runMapReduceJob(String featureName,
                                    Map<String, String> dsConf,
                                    Path mapredCSVFilePath) throws Exception {
    Job job = Job.getInstance(new Configuration());
    job.setJobName("GeoMesa GDELT Ingest");
    job.setJarByClass(GDELTIngest.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(GDELTIngestMapper.class);
    job.setOutputFormatClass(GeoMesaOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SimpleFeature.class);
    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, mapredCSVFilePath);
    GeoMesaOutputFormat.configureDataStore(job, dsConf);
    job.getConfiguration().set(FEATURE_NAME, featureName);

    job.submit();

    if (!job.waitForCompletion(true)) {
        throw new Exception("Job failed");
    }
}
项目:flink    文件:WordCount.java   
public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: WordCount <input path> <result path>");
        return;
    }

    final String inputPath = args[0];
    final String outputPath = args[1];

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));

    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    TextOutputFormat.setOutputPath(job, new Path(outputPath));

    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
}
项目:hadoop-2.6.0-cdh5.4.3    文件:FailJob.java   
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestMapReduceLazyOutput.java   
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = Job.getInstance(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
项目:hadoop-2.6.0-cdh5.4.3    文件:UserNamePermission.java   
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = new Job(conf, "user name check"); 


  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);

  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:FieldSelectionMapper.java   
public void setup(Context context) 
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  this.fieldSeparator = 
    conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
  this.mapOutputKeyValueSpec = 
    conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
  try {
    this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
      context.getInputFormatClass().getCanonicalName());
  } catch (ClassNotFoundException e) {
    throw new IOException("Input format class not found", e);
  }
  allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
    mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
  LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
    mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
    mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestMapReduceLazyOutput.java   
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = new Job(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
项目:hadoop-2.6.0-cdh5.4.3    文件:UserNamePermission.java   
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = new Job(conf, "user name check"); 


  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);

  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:Hanhan-Hadoop-MapReduce    文件:WordCountImproved.java   
@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountImproved.class);

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));
    TextOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}