Java 类org.apache.hadoop.mapreduce.lib.map.InverseMapper 实例源码

项目:TopPI    文件:TopPIoverHadoop.java   
private boolean genBigItemMap(String input, String output) throws IOException, ClassNotFoundException,
        InterruptedException {
    Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
    job.setJarByClass(TopPIoverHadoop.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(InverseMapper.class);
    job.setReducerClass(ItemBigRebasingReducer.class);
    job.setNumReduceTasks(1);

    return job.waitForCompletion(true);
}
项目:cloudera-homework    文件:WordCoAscendingFrequency.java   
@Override
public int run(String[] args) throws Exception {

  if (args.length != 2) {
    System.out.printf("Usage: WordCo <input dir> <output dir>\n");
    return -1;
  }

  Job job = new Job(getConf());
  job.setJarByClass(WordCoAscendingFrequency.class);
  job.setJobName("Word Co-Occurrence String Pair");

  FileInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));

  job.setInputFormatClass(SequenceFileInputFormat.class);

  //job.setMapOutputKeyClass(IntWritable.class);
  //job.setMapOutputValueClass(StringPairWritable.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(StringPairWritable.class);
  job.setMapperClass(InverseMapper.class);
  //job.setReducerClass(SumReducer.class);//identity reducer

  boolean success = job.waitForCompletion(true);
  return success ? 0 : 1;
}
项目:cloudera-homework    文件:WordCoDescendingFrequency.java   
@Override
public int run(String[] args) throws Exception {

  if (args.length != 2) {
    System.out.printf("Usage: WordCo <input dir> <output dir>\n");
    return -1;
  }

  Job job = new Job(getConf());
  job.setJarByClass(WordCoDescendingFrequency.class);
  job.setJobName("Word Co-Occurrence String Pair");

  FileInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));

  job.setInputFormatClass(SequenceFileInputFormat.class);

  //job.setMapOutputKeyClass(IntWritable.class);
  //job.setMapOutputValueClass(StringPairWritable.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(StringPairWritable.class);
  job.setMapperClass(InverseMapper.class);
  //jobsetSortComparatorClass(LongWritable.DecreasingComparator.class);
  job.setSortComparatorClass(IntDescendingComparator.class);
  //job.setReducerClass(SumReducer.class);//identity reducer

  boolean success = job.waitForCompletion(true);
  return success ? 0 : 1;
}
项目:hadoop    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hadoop    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:aliyun-oss-hadoop-fs    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:aliyun-oss-hadoop-fs    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:big-c    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:big-c    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hadoop-2.6.0-cdh5.4.3    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hadoop-plus    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 2,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hadoop-plus    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:FlexMap    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:FlexMap    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hops    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect)
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hops    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:glusterfs-hadoop-examples    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:stats-hdfs    文件:Grep.java   
@SuppressWarnings("deprecation")
   public int run(String[] args) throws Exception {
long random = new Random().nextLong();
log.info("random -> " + random);
// 第三个参数为抓取的单词目标
args = new String[] { String.format(ConfigUtils.HDFS.WORDCOUNT_IN, "word.txt"), String.format(ConfigUtils.HDFS.WORDCOUNT_OUT, random),"d" };

Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

Job grepJob = new Job(conf);

try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass( // sort by decreasing freq
    LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
} finally {
    FileSystem.get(conf).delete(tempDir, true);
}
return 0;
   }
项目:hadoop-TCP    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 2,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hadoop-TCP    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hardfs    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 2,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hardfs    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hadoop-on-lustre2    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hadoop-on-lustre2    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:pmr-common    文件:HawqIngestTool.java   
@SuppressWarnings("rawtypes")
@Override
protected Class<? extends Mapper> getMapperClass() {
    return InverseMapper.class;
}
项目:mapreduce-fork    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:mapreduce-fork    文件:TestMRKeyFieldBasedComparator.java   
private void testComparator(String keySpec, int expect) 
    throws Exception {
  String root = System.getProperty("test.build.data", "/tmp");
  Path inDir = new Path(root, "test_cmp/in");
  Path outDir = new Path(root, "test_cmp/out");

  conf.set("mapreduce.partition.keycomparator.options", keySpec);
  conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
  conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 2,
              line1 +"\n" + line2 + "\n"); 
  job.setMapperClass(InverseMapper.class);
  job.setReducerClass(Reducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  job.setSortComparatorClass(KeyFieldBasedComparator.class);
  job.setPartitionerClass(KeyFieldBasedPartitioner.class);

  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // validate output
  Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:t4f-data    文件:WordCountTool.java   
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = new Job(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = new Job(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
        LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}
项目:t4f-data    文件:GrepTool.java   
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        org.apache.hadoop.util.Tool t;
        return 2;
    }

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);

    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
        LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);

    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }

    return 0;
}