Java 类org.apache.hadoop.hbase.mapreduce.TableInputFormat 实例源码

项目：cs433 文件：JobSubmitter.java

public void run() throws Exception {
    String tableName = "contacts";

    Configuration config = HBaseConfiguration.create();

    Scan scan = new Scan();
    scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false);  // don't set to true for MR jobs

    config.set(TableInputFormat.SCAN, convertScanToString(scan));
    config.set(TableInputFormat.INPUT_TABLE, tableName);

    Job job = new Job(config, "index builder");
    job.setJarByClass(JobSubmitter.class);
    job.setMapperClass(IndexMapper.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TableInputFormat.class);
    job.setOutputFormatClass(MultiTableOutputFormat.class);

    boolean b = job.waitForCompletion(true);
    if (!b) {
        throw new IOException("error with job!");
    }
}

项目：notaql 文件：HBaseEngineEvaluator.java

@Override
public JavaRDD<ObjectValue> evaluate(Transformation transformation) {
    final SparkTransformationEvaluator evaluator = new SparkTransformationEvaluator(transformation);
    JavaSparkContext sc = NotaQL.SparkFactory.getSparkContext();

    final Configuration conf = createConf();
    conf.set(TableInputFormat.INPUT_TABLE, tableId);

    final JavaPairRDD<ImmutableBytesWritable, Result> inputRDD =
            sc.newAPIHadoopRDD(conf, TableInputFormat.class, ImmutableBytesWritable.class, org.apache.hadoop.hbase.client.Result.class);

    // convert all rows in rdd to inner format
    final JavaRDD<Value> converted = inputRDD.map(t -> ValueConverter.convertToNotaQL(t._2));
    // filter the ones not fulfilling the input filter
    final JavaRDD<Value> filtered = converted.filter(v -> transformation.satisfiesInPredicate((ObjectValue) v));

    // process all input
    return evaluator.process(filtered);
}

项目：mara 文件：TableInputAnnotationHandler.java

@Override
public void process(Annotation annotation, Job job, Object target)
        throws ToolException {

    TableInput tableInput = (TableInput)annotation;

    // Base setup of the table mapper job
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));

    try {
        // Add dependencies
        TableMapReduceUtil.addDependencyJars(job);

        String tableName = getTableName(tableInput);
        Scan scan = getScan(tableInput);

        job.setInputFormatClass(TableInputFormat.class);
        conf.set(TableInputFormat.INPUT_TABLE, tableName);
        conf.set(TableInputFormat.SCAN, convertScanToString(scan));

    } catch (IOException e) {
        throw new ToolException(e);
    }
}

项目：mara 文件：TableInputAnnotationHandlerTest.java

@Test
public void testProcessDefaults() {

    try {
        Annotation annotation = setupDriver(new TableDriverDefaults());

        handler.process(annotation, job, null);

        verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
        assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo(TEST_INPUT));
        assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAf//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));

    } catch (ToolException | NoSuchFieldException | SecurityException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

项目：mara 文件：TableInputAnnotationHandlerTest.java

@Test
public void testProcessExplicitTable() {

    try {
        Annotation annotation = setupDriver(new TableDriverExplicitTable());
        handler.process(annotation, job, null);

        verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
        assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("my_table"));
        assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAf//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));

    } catch (ToolException | NoSuchFieldException | SecurityException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

项目：mara 文件：TableInputAnnotationHandlerTest.java

@Test
public void testProcessCustomScan() {

    try {
        Annotation annotation = setupDriver(new TableDriverWithScan());

        handler.process(annotation, job, null);

        verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
        assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("test.my_table"));
        assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAv//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));

    } catch (ToolException | NoSuchFieldException | SecurityException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

项目：mara 文件：TableInputAnnotationHandlerTest.java

@Test
public void testConditionalName() {
    try {
        Annotation annotation = setupDriver(new TableDriverNameExpr());

        handler.process(annotation, job, null);

        verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
        assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("myTable"));

        TableDriverNameExpr.PREFIX = "test";
        handler.process(annotation, job, null);
        assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("test.myTable"));

    } catch (ToolException | NoSuchFieldException | SecurityException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}

项目：HIndex 文件：IndexTsvImporterMapper.java

/**
 * Handles initializing this class with objects specific to it (i.e., the parser). Common
 * initialization that might be leveraged by a subsclass is done in <code>doSetup</code>. Hence a
 * subclass may choose to override this method and call <code>doSetup</code> as well before
 * handling it's own custom params.
 * @param context
 */
@Override
protected void setup(Context context) throws IOException {
  doSetup(context);

  Configuration conf = context.getConfiguration();

  parser = new TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
  if (parser.getRowKeyColumnIndex() == -1) {
    throw new RuntimeException("No row key column specified");
  }
  String tableName = conf.get(TableInputFormat.INPUT_TABLE);
  HTable hTable = null;
  try {
    hTable = new HTable(conf, tableName);
    this.startKeys = hTable.getStartKeys();
    byte[] indexBytes = hTable.getTableDescriptor().getValue(Constants.INDEX_SPEC_KEY);
    if (indexBytes != null) {
      TableIndices tableIndices = new TableIndices();
      tableIndices.readFields(indexBytes);
      this.indices = tableIndices.getIndices();
    }
  } finally {
    if (hTable != null) hTable.close();
  }
}

项目：themis 文件：ThemisTableMapReduceUtil.java

public static void initTableMapperJob(String table, Scan scan,
    Class<? extends TableMapper> mapper,
    Class<? extends WritableComparable> outputKeyClass,
    Class<? extends Writable> outputValueClass, Job job,
    boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
throws IOException {
  job.setInputFormatClass(inputFormatClass);
  if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
  if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
  job.setMapperClass(mapper);
  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
  conf.set(TableInputFormat.INPUT_TABLE, table);
  conf.set(TableInputFormat.SCAN, convertScanToString(scan));
  if (addDependencyJars) {
    addDependencyJars(job);
  }
  TableMapReduceUtil.initCredentials(job);
}

项目：cloud-bigtable-examples 文件：CellCounter.java

private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
  Scan s = new Scan();
  // Set Scan Versions
  s.setMaxVersions(Integer.MAX_VALUE);
  s.setCacheBlocks(false);
  // Set Scan Column Family
  if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
    s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
  }
  // Set RowFilter or Prefix Filter if applicable.
  Filter rowFilter = getRowFilter(args);
  if (rowFilter!= null) {
    LOG.info("Setting Row Filter for counter.");
    s.setFilter(rowFilter);
  }
  // Set TimeRange if defined
  long timeRange[] = getTimeRange(args);
  if (timeRange != null) {
    LOG.info("Setting TimeRange for counter.");
    s.setTimeRange(timeRange[0], timeRange[1]);
  }
  LOG.warn("Got the Scan: " + s);
  return s;
}

项目：cloud-bigtable-examples 文件：CellCounter.java

@Override
public int run(String[] args) throws Exception {
  String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("ERROR: Wrong number of parameters: " + args.length);
    System.err.println("Usage: CellCounter ");
    System.err.println("       <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
      "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
    System.err.println("  Note: -D properties will be applied to the conf used. ");
    System.err.println("  Additionally, the following SCAN properties can be specified");
    System.err.println("  to get fine grained control on what is counted..");
    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
    System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
        "string : used to separate the rowId/column family name and qualifier name.");
    System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
        "operation to a limited subset of rows from the table based on regex or prefix pattern.");
    return -1;
  }
  Job job = createSubmittableJob(getConf(), otherArgs);
  return (job.waitForCompletion(true) ? 0 : 1);
}

项目：Test_Projects 文件：HbaseReadTimeSeries.java

private static Configuration initializeHBaseConfig() {

        Configuration hbaseConfig = HBaseConfiguration.create();

        hbaseConfig.set(TableInputFormat.INPUT_TABLE, Consts.TARGET_TABLE);
        hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000");
        hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000");
        hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1");
        hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol");

        hbaseConfig.set("hbase.distributed.cluster", "true");
        hbaseConfig.set("hbase.zookeeper.quorum", Consts.ZOOKKEEPER_QUORUM);

        hbaseConfig.set("mapreduce.job.maps", "4");
        hbaseConfig.set("mapred.map.tasks", "4");
        hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4");

        return hbaseConfig;
    }

项目：Test_Projects 文件：StreamingKafkaHbaseWrite.java

private static Configuration initializeHBaseConfig() {

        Configuration hbaseConfig = HBaseConfiguration.create();

        hbaseConfig.set(TableInputFormat.INPUT_TABLE, TARGET_TABLE);
        hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000");
        hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000");
        hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1");
        hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol");

        hbaseConfig.set("hbase.distributed.cluster", "true");
        hbaseConfig.set("hbase.zookeeper.quorum", ZOOKKEEPER_QUORUM);

        hbaseConfig.set("mapreduce.job.maps", "4");
        hbaseConfig.set("mapred.map.tasks", "4");
        hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4");

        return hbaseConfig;
    }

项目：Test_Projects 文件：StreamingKafkaHbaseWrite.java

private static Configuration initializeHBaseConfig() {

        Configuration hbaseConfig = HBaseConfiguration.create();

        hbaseConfig.set(TableInputFormat.INPUT_TABLE, TARGET_TABLE);
        hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000");
        hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000");
        hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1");
        hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol");

        hbaseConfig.set("hbase.distributed.cluster", "true");
        hbaseConfig.set("hbase.zookeeper.quorum", ZOOKKEEPER_QUORUM);

        hbaseConfig.set("mapreduce.job.maps", "4");
        hbaseConfig.set("mapred.map.tasks", "4");
        hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4");

        return hbaseConfig;
    }

项目：HBase-LOB 文件：SweepJob.java

/**
 * Prepares a map reduce job.
 * @param tn The current table name.
 * @param familyName The current family name.
 * @param scan The current scan.
 * @param conf The current configuration.
 * @return A map reduce job.
 * @throws IOException
 */
private Job prepareJob(TableName tn, String familyName, Scan scan, Configuration conf)
    throws IOException {
  Job job = Job.getInstance(conf);
  job.setJarByClass(SweepMapper.class);
  TableMapReduceUtil.initTableMapperJob(tn.getNameAsString(), scan,
      SweepMapper.class, Text.class, Writable.class, job);

  job.setInputFormatClass(TableInputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(KeyValue.class);
  job.setReducerClass(SweepReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  String jobName = getCustomJobName(this.getClass().getSimpleName(), tn.getNameAsString(),
      familyName);
  job.setJobName(jobName);
  if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
    String fileLoc = conf.get(CREDENTIALS_LOCATION);
    Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
    job.getCredentials().addAll(cred);
  }
  return job;
}

项目：spliceengine 文件：SpliceTableMapReduceUtil.java

/**
 * Use this before submitting a TableMap job. It will appropriately set up
 * the job.
 *
 * @param table  The Splice table name to read from.
 * @param scan  The scan instance with the columns, time range etc.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(String table,Scan scan,
                                      Class<? extends Mapper> mapper,
                                      Class<? extends WritableComparable> outputKeyClass,
                                      Class<? extends Object> outputValueClass,Job job,
                                      boolean addDependencyJars,Class<? extends InputFormat> inputFormatClass)
        throws IOException{
    job.setInputFormatClass(inputFormatClass);
    if(outputValueClass!=null) job.setMapOutputValueClass(outputValueClass);
    if(outputKeyClass!=null) job.setMapOutputKeyClass(outputKeyClass);
    if(mapper!=null) job.setMapperClass(mapper);
    job.getConfiguration().set(MRConstants.SPLICE_INPUT_TABLE_NAME,table);
    job.getConfiguration().set(TableInputFormat.SCAN,convertScanToString(scan));
    if(addDependencyJars){
        addDependencyJars(job);
    }

}

项目：spliceengine 文件：SMInputFormat.java

public SMRecordReaderImpl getRecordReader(InputSplit split, Configuration config) throws IOException,
        InterruptedException {
    config.addResource(conf);
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "getRecordReader with table=%s, inputTable=%s," +
                "conglomerate=%s",
                table,
                config.get(TableInputFormat.INPUT_TABLE),
                config.get(MRConstants.SPLICE_INPUT_CONGLOMERATE));
    rr = new SMRecordReaderImpl(conf);
    if(table == null){
        TableName tableInfo = TableName.valueOf(config.get(TableInputFormat.INPUT_TABLE));
        PartitionFactory tableFactory=SIDriver.driver().getTableFactory();
        table = ((ClientPartition)tableFactory.getTable(tableInfo)).unwrapDelegate();
    }
    rr.setHTable(table);
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "returning record reader");
    return rr;
}

项目：SpyGlass 文件：HBaseRawTap.java

@Override
public void sourceConfInit(FlowProcess<JobConf> process, JobConf conf) {
    // a hack for MultiInputFormat to see that there is a child format
    FileInputFormat.setInputPaths(conf, getPath());

    if (quorumNames != null) {
        conf.set("hbase.zookeeper.quorum", quorumNames);
    }

    LOG.debug("sourcing from table: {}", tableName);
    conf.set(TableInputFormat.INPUT_TABLE, tableName);
    if (null != base64Scan)
        conf.set(TableInputFormat.SCAN, base64Scan);

    super.sourceConfInit(process, conf);
}

项目：zieook 文件：RatingExportTool.java

@Override
public boolean execute() throws Exception
{
    Configuration conf = getConf();
    conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.USERTABLE_COLUMN_RATING);

    Job job = new Job(conf);
    job.setJobName("Prepare recommender: <" + getInputTable() + ">");

    // mapper
    TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), RatingExportMap.class,
            ImmutableBytesWritable.class, Text.class, job);

    // reducer:
    job.setReducerClass(RatingExportReduce.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputFile));

    return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled());
}

项目：zieook 文件：RecommendationsExportTool.java

@Override
public boolean execute() throws Exception
{
    Configuration conf = getConf();
    conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.RECOMMENDATION_COLUMN);

    Job job = new Job(conf);
    job.setJobName("Prepare recommender: <" + getInputTable() + ">");

    // mapper
    TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), RecommendationsExportMap.class,
            ImmutableBytesWritable.class, Text.class, job);

    // reducer:
    job.setReducerClass(Reducer.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputFile));

    return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled());
}

项目：zieook 文件：CollectionExportTool.java

@Override
public boolean execute() throws Exception
{
    Configuration conf = getConf();
    conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.COLLECTION_TABLE_COLUMN_INTR);

    Job job = new Job(conf);
    job.setJobName("Prepare recommender: <" + getInputTable() + ">");

    // mapper
    TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), CollectionExportMap.class,
            ImmutableBytesWritable.class, Text.class, job);

    // reducer:
    job.setReducerClass(CollectionExportReduce.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputFile));

    return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled());
}

项目：zieook 文件：UserExportTool.java

@Override
public boolean execute() throws Exception
{
    Configuration conf = getConf();
    conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.USERTABLE_COLUMN_FOAF);

    Job job = new Job(conf);
    job.setJobName("Prepare recommender: <" + getInputTable() + ">");

    // mapper
    TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), UserExportMap.class,
            ImmutableBytesWritable.class, Text.class, job);

    // reducer:
    job.setReducerClass(UserExportReduce.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputFile));

    return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled());
}

项目：hindex 文件：IndexMapReduceUtil.java

public static List<Put> getIndexPut(Put userPut, Configuration conf) throws IOException {
  String tableName = conf.get(TableInputFormat.INPUT_TABLE);
  IndexedHTableDescriptor tableDescriptor = getTableDescriptor(tableName, conf);
  List<Put> indexPuts = new ArrayList<Put>();
  if (tableDescriptor != null) {
    List<IndexSpecification> indices = tableDescriptor.getIndices();
    for (IndexSpecification index : indices) {
      byte[] startkey = getStartKey(conf, tableName, userPut.getRow());
      Put indexPut = IndexUtils.prepareIndexPut(userPut, index, startkey);
      if (indexPut != null) {
        indexPuts.add(indexPut);
      }
    }
  }
  return indexPuts;
}

项目：hindex 文件：IndexMapReduceUtil.java

public static List<Delete> getIndexDelete(Delete userDelete, Configuration conf)
    throws IOException {
  String tableName = conf.get(TableInputFormat.INPUT_TABLE);
  IndexedHTableDescriptor tableDescriptor = getTableDescriptor(tableName, conf);
  List<Delete> indexDeletes = new ArrayList<Delete>();
  if (tableDescriptor != null) {
    List<IndexSpecification> indices = tableDescriptor.getIndices();
    for (IndexSpecification index : indices) {
      byte[] startkey = getStartKey(conf, tableName, userDelete.getRow());
      Delete indexDelete = IndexUtils.prepareIndexDelete(userDelete, index, startkey);
      if (indexDelete != null) {
        indexDeletes.add(indexDelete);
      }
    }
  }
  return indexDeletes;
}

项目：ColumnManagerForHBase 文件：ColumnInvalidityReport.java

Job createSubmittableJob(final String[] args) throws IOException {
  Configuration configFromArgs = parseArguments(args);
  if (configFromArgs == null || sourceTableNameString == null) {
    return null;
  }
  getConf().addResource(configFromArgs);
  getConf().setBoolean(Repository.MAP_SPECULATIVE_CONF_KEY, true); // no redundant processing

  Job job = Job.getInstance(
          getConf(), getConf().get(Repository.JOB_NAME_CONF_KEY, sourceTableNameString));
  TableMapReduceUtil.addDependencyJars(job);
  Scan scan = new Scan();
  // note that user can override scan row-caching by setting TableInputFormat.SCAN_CACHEDROWS
  scan.setCaching(getConf().getInt(TableInputFormat.SCAN_CACHEDROWS, 500));
  scan.setCacheBlocks(false);  // should be false for MapReduce jobs

  if (!verboseReport && !reportType.equals(ReportType.VALUE)) {
    scan.setFilter(new KeyOnlyFilter(true));
  }
  if (includeAllCells) {
    scan.setMaxVersions();
  }
  if (sourceColFamily != null) {
    scan.addFamily(sourceColFamily);
  }
  TableMapReduceUtil.initTableMapperJob(sourceTableNameString,
          scan,
          ColumnInvalidityReportMapper.class,
          null,  // mapper output key is null
          null,  // mapper output value is null
          job);
  job.setOutputFormatClass(NullOutputFormat.class);   // no Mapper output, no Reducer

  return job;
}

项目：ColumnManagerForHBase 文件：ColumnDiscoveryTool.java

Job createSubmittableJob(final String[] args) throws IOException {
  if (!parseArguments(args)) {
    return null;
  }
  getConf().setBoolean(Repository.MAP_SPECULATIVE_CONF_KEY, true); // no redundant processing
  getConf().set(Repository.TABLE_NAME_CONF_KEY, sourceTableNameString);
  Job job = Job.getInstance(
          getConf(), getConf().get(Repository.JOB_NAME_CONF_KEY, sourceTableNameString));
  TableMapReduceUtil.addDependencyJars(job);
  Scan scan = new Scan();
  // note that user can override scan row-caching by setting TableInputFormat.SCAN_CACHEDROWS
  scan.setCaching(getConf().getInt(TableInputFormat.SCAN_CACHEDROWS, 500));
  scan.setCacheBlocks(false);  // should be false for scanning in MapReduce jobs
  scan.setFilter(new KeyOnlyFilter(true));
  if (includeAllCells) {
    scan.setMaxVersions();
  }
  TableMapReduceUtil.initTableMapperJob(
          sourceTableNameString,
          scan,
          ColumnDiscoveryMapper.class,
          null,  // mapper output key is null
          null,  // mapper output value is null
          job);
  job.setOutputFormatClass(NullOutputFormat.class);   // no Mapper output, no Reducer

  return job;
}

项目：titan0.5.4-hbase1.1.1-custom 文件：TitanHBaseInputFormat.java

@Override
    public void setConf(final Configuration config) {
        super.setConf(config);

        //config.set(TableInputFormat.SCAN_COLUMN_FAMILY, Backend.EDGESTORE_NAME);
        config.set(TableInputFormat.INPUT_TABLE, inputConf.get(HBaseStoreManager.HBASE_TABLE));
        //config.set(HConstants.ZOOKEEPER_QUORUM, config.get(TITAN_HADOOP_GRAPH_INPUT_TITAN_STORAGE_HOSTNAME));
        config.set(HConstants.ZOOKEEPER_QUORUM, inputConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
//        if (basicConf.get(TITAN_HADOOP_GRAPH_INPUT_TITAN_STORAGE_PORT, null) != null)
        if (inputConf.has(GraphDatabaseConfiguration.STORAGE_PORT))
            config.set(HConstants.ZOOKEEPER_CLIENT_PORT, String.valueOf(inputConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
        config.set("autotype", "none");
        log.debug("hbase.security.authentication={}", config.get("hbase.security.authentication"));
        Scan scanner = new Scan();
        // TODO the mapping is private in HBaseStoreManager and leaks here -- replace String database/CF names with an enum where each value has both a short and long name
        if (inputConf.get(HBaseStoreManager.SHORT_CF_NAMES)) {
            scanner.addFamily("e".getBytes());
            edgestoreFamily = Bytes.toBytes("e");
        } else {
            scanner.addFamily(Backend.EDGESTORE_NAME.getBytes());
            edgestoreFamily = Bytes.toBytes(Backend.EDGESTORE_NAME);
        }
        //scanner.setFilter(getColumnFilter(titanSetup.inputSlice(this.vertexQuery)));
        scanner.setFilter(getColumnFilter(TitanHadoopSetupCommon.getDefaultSliceQuery()));
        //TODO (minor): should we set other options in http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html for optimization?
        Method converter;
        try {
            converter = TableMapReduceUtil.class.getDeclaredMethod("convertScanToString", Scan.class);
            converter.setAccessible(true);
            config.set(TableInputFormat.SCAN, (String) converter.invoke(null, scanner));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        this.tableInputFormat.setConf(config);
    }

项目：spork-streaming 文件：HBaseStorage.java

@Override
public InputFormat getInputFormat() {
    TableInputFormat inputFormat = new HBaseTableIFBuilder()
    .withLimit(limit_)
    .withGt(gt_)
    .withGte(gte_)
    .withLt(lt_)
    .withLte(lte_)
    .withConf(m_conf)
    .build();
    return inputFormat;
}

项目：spork-streaming 文件：HBaseStorage.java

@Override
public void setLocation(String location, Job job) throws IOException {
    Properties udfProps = getUDFProperties();
    job.getConfiguration().setBoolean("pig.noSplitCombination", true);

    initialiseHBaseClassLoaderResources(job);
    m_conf = initializeLocalJobConfig(job);
    String delegationTokenSet = udfProps.getProperty(HBASE_TOKEN_SET);
    if (delegationTokenSet == null) {
        addHBaseDelegationToken(m_conf, job);
        udfProps.setProperty(HBASE_TOKEN_SET, "true");
    }

    String tablename = location;
    if (location.startsWith("hbase://")) {
        tablename = location.substring(8);
    }

    m_conf.set(TableInputFormat.INPUT_TABLE, tablename);

    String projectedFields = udfProps.getProperty( projectedFieldsName() );
    if (projectedFields != null) {
        // update columnInfo_
        pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields));
    }
    addFiltersWithoutColumnPrefix(columnInfo_);

    if (requiredFieldList != null) {
        Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
                new String[] {contextSignature});
        p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList));
    }
    m_conf.set(TableInputFormat.SCAN, convertScanToString(scan));
}

项目：spork 文件：HBaseStorage.java

@Override
public InputFormat getInputFormat() {
    TableInputFormat inputFormat = new HBaseTableIFBuilder()
    .withLimit(limit_)
    .withGt(gt_)
    .withGte(gte_)
    .withLt(lt_)
    .withLte(lte_)
    .withConf(m_conf)
    .build();
    inputFormat.setScan(scan);
    return inputFormat;
}

项目：spork 文件：HBaseStorage.java

@Override
public void setLocation(String location, Job job) throws IOException {
    Properties udfProps = getUDFProperties();
    job.getConfiguration().setBoolean("pig.noSplitCombination", true);

    m_conf = initializeLocalJobConfig(job);
    String delegationTokenSet = udfProps.getProperty(HBASE_TOKEN_SET);
    if (delegationTokenSet == null) {
        addHBaseDelegationToken(m_conf, job);
        udfProps.setProperty(HBASE_TOKEN_SET, "true");
    }

    String tablename = location;
    if (location.startsWith("hbase://")) {
        tablename = location.substring(8);
    }

    m_conf.set(TableInputFormat.INPUT_TABLE, tablename);

    String projectedFields = udfProps.getProperty( projectedFieldsName() );
    if (projectedFields != null) {
        // update columnInfo_
        pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields));
    }
    addFiltersWithoutColumnPrefix(columnInfo_);

    if (requiredFieldList != null) {
        Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
                new String[] {contextSignature});
        p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList));
    }
}

项目：HBase-LOB 文件：SweepReducer.java

@Override
protected void setup(Context context) throws IOException, InterruptedException {
  this.conf = context.getConfiguration();
  this.fs = FileSystem.get(conf);
  // the MOB_COMPACTION_DELAY is ONE_DAY by default. Its value is only changed when testing.
  mobCompactionDelay = conf.getLong(SweepJob.MOB_COMPACTION_DELAY, SweepJob.ONE_DAY);
  String tableName = conf.get(TableInputFormat.INPUT_TABLE);
  String familyName = conf.get(TableInputFormat.SCAN_COLUMN_FAMILY);
  this.familyDir = MobUtils.getMobFamilyPath(conf, TableName.valueOf(tableName), familyName);
  HBaseAdmin admin = new HBaseAdmin(this.conf);
  try {
    family = admin.getTableDescriptor(Bytes.toBytes(tableName)).getFamily(
        Bytes.toBytes(familyName));
  } finally {
    try {
      admin.close();
    } catch (IOException e) {
      LOG.warn("Fail to close the HBaseAdmin", e);
    }
  }
  // disable the block cache.
  Configuration copyOfConf = new Configuration(conf);
  copyOfConf.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.00001f);
  this.cacheConfig = new CacheConfig(copyOfConf);

  table = new HTable(this.conf, Bytes.toBytes(tableName));
  table.setAutoFlush(false, false);

  table.setWriteBufferSize(1 * 1024 * 1024); // 1MB
  memstore = new MemStoreWrapper(context, fs, table, family, new MemStore(), cacheConfig);

  // The start time of the sweep tool.
  // Only the mob files whose creation time is older than startTime-oneDay will be handled by the
  // reducer since it brings inconsistency to handle the latest mob files.
  this.compactionBegin = conf.getLong(MobConstants.MOB_COMPACTION_START_DATE, 0);
}

项目：HGraph 文件：CalculateInputSplitReducer.java

@Override
protected void setup(Context context) throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  mappersForOneRegion = conf.getInt(MAPPERS_FOR_ONE_REGION, mappersForOneRegion);

  String vertexTableName = conf.get(TableInputFormat.INPUT_TABLE);
  if (null == vertexTableName || "".equals(vertexTableName)) {
    throw new IllegalArgumentException(TableInputFormat.INPUT_TABLE
        + " shall not be empty or null");
  }
  vertexTable = new HTable(conf, vertexTableName);
}

项目：HGraph 文件：CalculateInputSplitMapper.java

@Override
protected void setup(Context context) throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  bypassKeys = conf.getInt(BY_PASS_KEYS, bypassKeys);

  String vertexTableName = conf.get(TableInputFormat.INPUT_TABLE);
  if (null == vertexTableName || "".equals(vertexTableName)) {
    throw new IllegalArgumentException(TableInputFormat.INPUT_TABLE
        + " shall not be empty or null");
  }
  vertexTable = new HTable(conf, vertexTableName);
}

项目：HGraph 文件：GetNoColumnsRows.java

public static Job createSubmittableJob(Configuration conf, String tableName, String outputPath)
    throws IOException {
  Validate.notEmpty(tableName, "tableName shall always not be empty");
  Validate.notEmpty(outputPath, "outputPath shall always not be empty");

  long timestamp = System.currentTimeMillis();
  Job job = null;
  String jobName = null;
  try {
    jobName = "GetNoCoumnsRows_" + timestamp;
    LOGGER.info("start to run job:" + jobName);
    job = new Job(conf, jobName);
    job.setJarByClass(GetNoColumnsRows.class);

    LOGGER.info("tableName=" + tableName);
    LOGGER.info("outputPath=" + outputPath);

    Scan scan = new Scan();
    TableMapReduceUtil.initTableMapperJob(tableName, scan, Mapper.class, Text.class,
      NullWritable.class, job, true, TableInputFormat.class);

    // only mapper
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(0);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
  } catch (IOException e) {
    LOGGER.error("run " + jobName + " failed", e);
    throw e;
  }
  return job;
}

项目：HGraph 文件：Driver.java

private static Job createInitialPageRankJob(Configuration conf, String outputBasePath,
    Class<? extends TableInputFormat> tableInputFormat)
    throws IOException {
  String tableName = conf.get(HBaseGraphConstants.HBASE_GRAPH_TABLE_VERTEX_NAME_KEY);
  long timestamp = System.currentTimeMillis();
  Job job = null;
  String jobName = null;
  try {
    jobName = "CalculateInitPageRank_" + timestamp;
    LOGGER.info("start to run job:" + jobName);

    job = new Job(conf, jobName);
    job.setJarByClass(Driver.class);
    Scan scan = new Scan();

    TableMapReduceUtil.initTableMapperJob(tableName, scan, CalculateInitPageRankMapper.class,
      Text.class, DoubleWritable.class, job, true, tableInputFormat);

    job.setReducerClass(CalculatePageRankReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    String outputPath = outputBasePath + "/" + timestamp;
    LOGGER.info("outputPath=" + outputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
  } catch (IOException e) {
    LOGGER.error("run " + jobName + " failed", e);
    throw e;
  }
  return job;
}

项目：HGraph 文件：GetRandomRowsByRegions.java

public static Job createSubmittableJob(Configuration conf, String tableName, String outputPath)
    throws IOException {
  Validate.notEmpty(tableName, "tableName shall always not be empty");
  Validate.notEmpty(outputPath, "outputPath shall always not be empty");

  long timestamp = System.currentTimeMillis();
  Job job = null;
  String jobName = null;
  try {
    jobName = GetRandomRowsByRegions.class.getSimpleName() + "_" + timestamp;
    LOGGER.info("start to run job:" + jobName);
    job = new Job(conf, jobName);
    job.setJarByClass(GetRandomRowsByRegions.class);

    LOGGER.info("tableName=" + tableName);
    LOGGER.info("outputPath=" + outputPath);

    Scan scan = new Scan();
    TableMapReduceUtil.initTableMapperJob(tableName, scan, Mapper.class, Text.class,
      NullWritable.class, job, true, TableInputFormat.class);

    // only mapper
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(0);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
  } catch (IOException e) {
    LOGGER.error("run " + jobName + " failed", e);
    throw e;
  }
  return job;
}

项目：PonIC 文件：HBaseStorage.java

@Override
public InputFormat getInputFormat() {      
    TableInputFormat inputFormat = new HBaseTableIFBuilder()
    .withLimit(limit_)
    .withGt(gt_)
    .withGte(gte_)
    .withLt(lt_)
    .withLte(lte_)
    .withConf(m_conf)
    .build();
    return inputFormat;
}

项目：PonIC 文件：HBaseStorage.java

@Override
public void setLocation(String location, Job job) throws IOException {
    job.getConfiguration().setBoolean("pig.noSplitCombination", true);
    m_conf = initialiseHBaseClassLoaderResources(job);

    String tablename = location;
    if (location.startsWith("hbase://")){
       tablename = location.substring(8);
    }
    if (m_table == null) {
        m_table = new HTable(m_conf, tablename);
    }
    m_table.setScannerCaching(caching_);
    m_conf.set(TableInputFormat.INPUT_TABLE, tablename);

    String projectedFields = getUDFProperties().getProperty( projectedFieldsName() );
    if (projectedFields != null) {
        // update columnInfo_
        pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields));
    }

    for (ColumnInfo columnInfo : columnInfo_) {
        // do we have a column family, or a column?
        if (columnInfo.isColumnMap()) {
            scan.addFamily(columnInfo.getColumnFamily());
        }
        else {
            scan.addColumn(columnInfo.getColumnFamily(),
                           columnInfo.getColumnName());
        }

    }
    if (requiredFieldList != null) {
        Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
                new String[] {contextSignature});
        p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList));
    }
    m_conf.set(TableInputFormat.SCAN, convertScanToString(scan));
}

项目：spliceengine 文件：AbstractSMInputFormat.java

/**
 * Allows subclasses to set the {@link HTable}.
 *
 * @param table  The table to get the data from.
 */
protected void setHTable(Table table) {
    if (table == null) throw new IllegalArgumentException("Unexpected null value for 'table'.");
    this.table = table;
    if (conf == null) throw new RuntimeException("Unexpected null value for 'conf'");
    conf.set(TableInputFormat.INPUT_TABLE, table.getName().getNameAsString());
}