Java 类org.apache.hadoop.mapreduce.lib.map.RegexMapper 实例源码

项目:big-c    文件:Logalyzer.java   
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{       
  Path grepInput = new Path(inputFilesDirectory);

  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }

  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");

  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);

  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);

  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);

  grepJob.setNumReduceTasks(1);                 // write a single file

  JobClient.runJob(grepJob);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:Logalyzer.java   
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{       
  Path grepInput = new Path(inputFilesDirectory);

  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }

  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");

  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);

  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);

  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);

  grepJob.setNumReduceTasks(1);                 // write a single file

  JobClient.runJob(grepJob);
}
项目:hadoop-plus    文件:Logalyzer.java   
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{       
  Path grepInput = new Path(inputFilesDirectory);

  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }

  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");

  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);

  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);

  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);

  grepJob.setNumReduceTasks(1);                 // write a single file

  JobClient.runJob(grepJob);
}
项目:hops    文件:Logalyzer.java   
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{       
  Path grepInput = new Path(inputFilesDirectory);

  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }

  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");

  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);

  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);

  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);

  grepJob.setNumReduceTasks(1);                 // write a single file

  JobClient.runJob(grepJob);
}
项目:hadoop-TCP    文件:Logalyzer.java   
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{       
  Path grepInput = new Path(inputFilesDirectory);

  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }

  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");

  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);

  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);

  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);

  grepJob.setNumReduceTasks(1);                 // write a single file

  JobClient.runJob(grepJob);
}
项目:hardfs    文件:Logalyzer.java   
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{       
  Path grepInput = new Path(inputFilesDirectory);

  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }

  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");

  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);

  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);

  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);

  grepJob.setNumReduceTasks(1);                 // write a single file

  JobClient.runJob(grepJob);
}
项目:hadoop-on-lustre2    文件:Logalyzer.java   
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{       
  Path grepInput = new Path(inputFilesDirectory);

  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }

  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");

  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);

  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);

  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);

  grepJob.setNumReduceTasks(1);                 // write a single file

  JobClient.runJob(grepJob);
}
项目:mapreduce-fork    文件:Logalyzer.java   
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{       
  Path grepInput = new Path(inputFilesDirectory);

  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }

  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");

  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);

  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);

  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);

  grepJob.setNumReduceTasks(1);                 // write a single file

  JobClient.runJob(grepJob);
}
项目:hadoop    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hadoop    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:aliyun-oss-hadoop-fs    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:big-c    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:big-c    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:hadoop-2.6.0-cdh5.4.3    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hadoop-2.6.0-cdh5.4.3    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:hadoop-plus    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hadoop-plus    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:FlexMap    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hops    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);

  try {

    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hops    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:glusterfs-hadoop-examples    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:stats-hdfs    文件:Grep.java   
@SuppressWarnings("deprecation")
   public int run(String[] args) throws Exception {
long random = new Random().nextLong();
log.info("random -> " + random);
// 第三个参数为抓取的单词目标
args = new String[] { String.format(ConfigUtils.HDFS.WORDCOUNT_IN, "word.txt"), String.format(ConfigUtils.HDFS.WORDCOUNT_OUT, random),"d" };

Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

Job grepJob = new Job(conf);

try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass( // sort by decreasing freq
    LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
} finally {
    FileSystem.get(conf).delete(tempDir, true);
}
return 0;
   }
项目:hadoop-TCP    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hadoop-TCP    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:hardfs    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hardfs    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:hadoop-on-lustre2    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:hadoop-on-lustre2    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:mapreduce-fork    文件:Grep.java   
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);

  try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
项目:mapreduce-fork    文件:Logalyzer.java   
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
项目:t4f-data    文件:WordCountTool.java   
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = new Job(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = new Job(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
        LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}
项目:t4f-data    文件:GrepTool.java   
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        org.apache.hadoop.util.Tool t;
        return 2;
    }

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);

    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
        LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);

    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }

    return 0;
}