/** * Use this before submitting a TableReduce job. It will * appropriately set up the JobConf. * * @param table The output table. * @param reducer The reducer class to use. * @param job The current job configuration to adjust. * @param partitioner Partitioner to use. Pass <code>null</code> to use * default partitioner. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When determining the region count fails. */ public static void initTableReduceJob(String table, Class<? extends TableReduce> reducer, JobConf job, Class partitioner, boolean addDependencyJars) throws IOException { job.setOutputFormat(TableOutputFormat.class); job.setReducerClass(reducer); job.set(TableOutputFormat.OUTPUT_TABLE, table); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); if (partitioner == HRegionPartitioner.class) { job.setPartitionerClass(HRegionPartitioner.class); int regions = MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) { job.setNumReduceTasks(regions); } } else if (partitioner != null) { job.setPartitionerClass(partitioner); } if (addDependencyJars) { addDependencyJars(job); } initCredentials(job); }
/** * Use this before submitting a TableReduce job. It will * appropriately set up the JobConf. * * @param table The output table. * @param reducer The reducer class to use. * @param job The current job configuration to adjust. * @param partitioner Partitioner to use. Pass <code>null</code> to use * default partitioner. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When determining the region count fails. */ public static void initTableReduceJob(String table, Class<? extends TableReduce> reducer, JobConf job, Class partitioner, boolean addDependencyJars) throws IOException { job.setOutputFormat(TableOutputFormat.class); job.setReducerClass(reducer); job.set(TableOutputFormat.OUTPUT_TABLE, table); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); if (partitioner == HRegionPartitioner.class) { job.setPartitionerClass(HRegionPartitioner.class); int regions = MetaReader.getRegionCount(HBaseConfiguration.create(job), table); if (job.getNumReduceTasks() > regions) { job.setNumReduceTasks(regions); } } else if (partitioner != null) { job.setPartitionerClass(partitioner); } if (addDependencyJars) { addDependencyJars(job); } initCredentials(job); }
@Override public void export(RpcController controller, ExportProtos.ExportRequest request, RpcCallback<ExportProtos.ExportResponse> done) { Region region = env.getRegion(); Configuration conf = HBaseConfiguration.create(env.getConfiguration()); conf.setStrings("io.serializations", conf.get("io.serializations"), ResultSerialization.class.getName()); try { Scan scan = validateKey(region.getRegionInfo(), request); Token userToken = null; if (userProvider.isHadoopSecurityEnabled() && !request.hasFsToken()) { LOG.warn("Hadoop security is enable, but no found of user token"); } else if (userProvider.isHadoopSecurityEnabled()) { userToken = new Token(request.getFsToken().getIdentifier().toByteArray(), request.getFsToken().getPassword().toByteArray(), new Text(request.getFsToken().getKind()), new Text(request.getFsToken().getService())); } ExportProtos.ExportResponse response = processData(region, conf, userProvider, scan, userToken, getWriterOptions(conf, region.getRegionInfo(), request)); done.run(response); } catch (IOException e) { CoprocessorRpcUtils.setControllerException(controller, e); LOG.error(e.toString(), e); } }
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table The table name to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job configuration to adjust. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). */ public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Class<? extends InputFormat> inputFormat) { job.setInputFormat(inputFormat); job.setMapOutputValueClass(outputValueClass); job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); if (addDependencyJars) { try { addDependencyJars(job); } catch (IOException e) { e.printStackTrace(); } } try { initCredentials(job); } catch (IOException ioe) { // just spit out the stack trace? really? ioe.printStackTrace(); } }
@Override public int run(String[] args) throws Exception { String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); return -1; } String inputVersionString = System.getProperty(ResultSerialization.IMPORT_FORMAT_VER); if (inputVersionString != null) { getConf().set(ResultSerialization.IMPORT_FORMAT_VER, inputVersionString); } Job job = createSubmittableJob(getConf(), otherArgs); return (job.waitForCompletion(true) ? 0 : 1); }
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table The table name to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job configuration to adjust. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). */ public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars) { job.setInputFormat(TableInputFormat.class); job.setMapOutputValueClass(outputValueClass); job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); if (addDependencyJars) { try { addDependencyJars(job); } catch (IOException e) { e.printStackTrace(); } } try { initCredentials(job); } catch (IOException ioe) { // just spit out the stack trace? really? ioe.printStackTrace(); } }
/** * Read sequence file from HDFS created by HBase export. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.InputFileOption.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path inputFile = new Path(cli.getArgValueAsString(CliCommonOpts.InputFileOption.INPUT)); Configuration conf = super.getConf(); conf.setStrings("io.serializations", conf.get("io.serializations"), ResultSerialization.class.getName()); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(inputFile)); HBaseScanAvroStock.AvroStockReader stockReader = new HBaseScanAvroStock.AvroStockReader(); try { ImmutableBytesWritable key = new ImmutableBytesWritable(); Result value = new Result(); while (reader.next(key)) { value = (Result) reader.getCurrentValue(value); Stock stock = stockReader.decode(value.getValue( HBaseWriter.STOCK_DETAILS_COLUMN_FAMILY_AS_BYTES, HBaseWriter.STOCK_COLUMN_QUALIFIER_AS_BYTES)); System.out.println(new String(key.get()) + ": " + ToStringBuilder .reflectionToString(stock, ToStringStyle.SIMPLE_STYLE)); } } finally { reader.close(); } return 0; }
private void configure(Configuration conf) { conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); }
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(cls); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + tableDescriptor.getTableName()); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(conf, tableDescriptor); configureBloomType(tableDescriptor, conf); configureBlockSize(tableDescriptor, conf); configureDataBlockEncoding(tableDescriptor, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + regionLocator.getName() + " output configured."); }