public static void main(String[] args) throws IOException, Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(FileToDBMapReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(DBOutputFormat.class); Path inputPath = new Path("/hua/hua.bcp"); FileInputFormat.setInputPaths(job, inputPath); DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://192.168.3.244:3306/hadoop", "hua", "hadoop"); DBOutputFormat.setOutput(job, "studentinfo", "id", "name"); job.setMapperClass(Mapper.class); job.setReducerClass(MyReducer.class); job.waitForCompletion(true); }
public static void main(String[] args) throws IOException, Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(DBToFileMapReduce.class); DistributedCache.addFileToClassPath(new Path( "/lib/mysql-connector-java-5.1.0-bin.jar"), conf); job.setMapperClass(DBInputMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(DBInputFormat.class); Path outputPath = new Path("/hua01"); FileOutputFormat.setOutputPath(job, outputPath); outputPath.getFileSystem(conf).delete(outputPath, true); DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://192.168.3.244:3306/hadoop", "hua", "hadoop"); String[] fields = { "id", "name" }; DBInputFormat.setInput(job, StudentinfoRecord.class, "studentinfo", null, "id", fields); job.waitForCompletion(true); }
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException { // configure n-gram mapreduce job Configuration conf1 = new Configuration(); conf1.set("textinputformat.record.delimiter", "."); // read a complete sentence as a line conf1.set("GRAM_NUMBER", args[2]); Job job1 = Job.getInstance(conf1); job1.setNumReduceTasks(3); job1.setJobName("NGram"); job1.setJarByClass(Dispatcher.class); job1.setMapperClass(NGramBuilder.NGramMapper.class); job1.setReducerClass(NGramBuilder.NGramReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(IntWritable.class); job1.setInputFormatClass(TextInputFormat.class); // default format: reads lines of text files job1.setOutputFormatClass(TextOutputFormat.class); // default format: key \t value TextInputFormat.setInputPaths(job1, new Path(args[0])); TextOutputFormat.setOutputPath(job1, new Path(args[1])); job1.waitForCompletion(true); // language model won't start to build until the n-gram library completely built // configure language model mapreduce job Configuration conf2 = new Configuration(); conf2.set("THRESHOLD", args[3]); conf2.set("TOP_K", args[4]); DBConfiguration.configureDB(conf2, "com.mysql.jdbc.Driver", "jdbc:mysql://127.0.0.1:3306/tp", "root", "123456"); // establish connection with mySQL database Job job2 = Job.getInstance(conf2); job2.setNumReduceTasks(3); job2.setJobName("LModel"); job2.setJarByClass(Dispatcher.class); job2.addArchiveToClassPath(new Path("/mysql/mysql-connector-java-5.1.39-bin.jar")); // putting this jar file into jre/lib/ext is recommended job2.setMapperClass(LanguageModel.ModelMapper.class); job2.setReducerClass(LanguageModel.ModelReducer.class); job2.setMapOutputKeyClass(Text.class); // Mapper emits different key type than the Reducer job2.setMapOutputValueClass(Text.class); // Mapper emits different value type than the Reducer job2.setOutputKeyClass(DBOutputWritable.class); job2.setOutputValueClass(NullWritable.class); job2.setInputFormatClass(TextInputFormat.class); job2.setOutputFormatClass(DBOutputFormat.class); TextInputFormat.setInputPaths(job2, new Path(args[1])); DBOutputFormat.setOutput(job2, "LanguageModel", new String[] {"starter", "follower", "probability"}); System.exit(job2.waitForCompletion(true) ? 0 : 1); }
@Override //Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL; if(args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); Configuration conf = getConf(); DBConfiguration.configureDB(conf, driverClassName, url); Job job = new Job(conf); job.setJobName("Count Pageviews of URLs"); job.setJarByClass(DBCountPageView.class); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBInputFormat.setInput(job, AccessRecord.class, "Access" , null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); int ret; try { ret = job.waitForCompletion(true) ? 0 : 1; boolean correct = verify(); if(!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return ret; }
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException { Configuration conf1 = new Configuration(); conf1.set("textinputformat.record.delimiter", "."); conf1.set("noGram", args[2]); //First Job Job job1 = Job.getInstance(conf1); job1.setJobName("NGram"); job1.setJarByClass(Driver.class); job1.setMapperClass(NGramLibraryBuilder.NGramMapper.class); job1.setReducerClass(NGramLibraryBuilder.NGramReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(IntWritable.class); job1.setInputFormatClass(TextInputFormat.class); job1.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.setInputPaths(job1, new Path(args[0])); TextOutputFormat.setOutputPath(job1, new Path(args[1])); job1.waitForCompletion(true); //Second Job Configuration conf2 = new Configuration(); conf2.set("threashold", args[3]); conf2.set("n", args[4]); DBConfiguration.configureDB(conf2, "com.mysql.jdbc.Driver", // driver class "jdbc:mysql://10.101.0.163:8889/test", // "root", // user name "root"); //password Job job2 = Job.getInstance(conf2); job2.setJobName("LanguageModel"); job2.setJarByClass(Driver.class); job2.addArchiveToClassPath(new Path("/mysql/mysql-connector-java-5.1.39-bin.jar")); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(NullWritable.class); job2.setMapperClass(LanguageModel.Map.class); job2.setReducerClass(LanguageModel.Reduce.class); job2.setInputFormatClass(TextInputFormat.class); job2.setOutputFormatClass(DBOutputFormat.class); DBOutputFormat.setOutput( job2, "output", // output table name new String[] { "starting_phrase", "following_word", "count" } //table columns ); //Path name for this job should match first job's output path name TextInputFormat.setInputPaths(job2, new Path(args[1])); System.exit(job2.waitForCompletion(true)?0:1); }
@Override //Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL; if(args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); Configuration conf = getConf(); DBConfiguration.configureDB(conf, driverClassName, url); Job job = Job.getInstance(conf); job.setJobName("Count Pageviews of URLs"); job.setJarByClass(DBCountPageView.class); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBInputFormat.setInput(job, AccessRecord.class, "HAccess" , null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); int ret; try { ret = job.waitForCompletion(true) ? 0 : 1; boolean correct = verify(); if(!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return ret; }