public void run() throws Exception { String tableName = "contacts"; Configuration config = HBaseConfiguration.create(); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs config.set(TableInputFormat.SCAN, convertScanToString(scan)); config.set(TableInputFormat.INPUT_TABLE, tableName); Job job = new Job(config, "index builder"); job.setJarByClass(JobSubmitter.class); job.setMapperClass(IndexMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } }
@Override public JavaRDD<ObjectValue> evaluate(Transformation transformation) { final SparkTransformationEvaluator evaluator = new SparkTransformationEvaluator(transformation); JavaSparkContext sc = NotaQL.SparkFactory.getSparkContext(); final Configuration conf = createConf(); conf.set(TableInputFormat.INPUT_TABLE, tableId); final JavaPairRDD<ImmutableBytesWritable, Result> inputRDD = sc.newAPIHadoopRDD(conf, TableInputFormat.class, ImmutableBytesWritable.class, org.apache.hadoop.hbase.client.Result.class); // convert all rows in rdd to inner format final JavaRDD<Value> converted = inputRDD.map(t -> ValueConverter.convertToNotaQL(t._2)); // filter the ones not fulfilling the input filter final JavaRDD<Value> filtered = converted.filter(v -> transformation.satisfiesInPredicate((ObjectValue) v)); // process all input return evaluator.process(filtered); }
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { TableInput tableInput = (TableInput)annotation; // Base setup of the table mapper job Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); try { // Add dependencies TableMapReduceUtil.addDependencyJars(job); String tableName = getTableName(tableInput); Scan scan = getScan(tableInput); job.setInputFormatClass(TableInputFormat.class); conf.set(TableInputFormat.INPUT_TABLE, tableName); conf.set(TableInputFormat.SCAN, convertScanToString(scan)); } catch (IOException e) { throw new ToolException(e); } }
@Test public void testProcessDefaults() { try { Annotation annotation = setupDriver(new TableDriverDefaults()); handler.process(annotation, job, null); verify(job, times(1)).setInputFormatClass(TableInputFormat.class); assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo(TEST_INPUT)); assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAf//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA")); } catch (ToolException | NoSuchFieldException | SecurityException e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testProcessExplicitTable() { try { Annotation annotation = setupDriver(new TableDriverExplicitTable()); handler.process(annotation, job, null); verify(job, times(1)).setInputFormatClass(TableInputFormat.class); assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("my_table")); assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAf//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA")); } catch (ToolException | NoSuchFieldException | SecurityException e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testProcessCustomScan() { try { Annotation annotation = setupDriver(new TableDriverWithScan()); handler.process(annotation, job, null); verify(job, times(1)).setInputFormatClass(TableInputFormat.class); assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("test.my_table")); assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAv//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA")); } catch (ToolException | NoSuchFieldException | SecurityException e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testConditionalName() { try { Annotation annotation = setupDriver(new TableDriverNameExpr()); handler.process(annotation, job, null); verify(job, times(1)).setInputFormatClass(TableInputFormat.class); assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("myTable")); TableDriverNameExpr.PREFIX = "test"; handler.process(annotation, job, null); assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("test.myTable")); } catch (ToolException | NoSuchFieldException | SecurityException e) { e.printStackTrace(); fail(e.getMessage()); } }
/** * Handles initializing this class with objects specific to it (i.e., the parser). Common * initialization that might be leveraged by a subsclass is done in <code>doSetup</code>. Hence a * subclass may choose to override this method and call <code>doSetup</code> as well before * handling it's own custom params. * @param context */ @Override protected void setup(Context context) throws IOException { doSetup(context); Configuration conf = context.getConfiguration(); parser = new TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator); if (parser.getRowKeyColumnIndex() == -1) { throw new RuntimeException("No row key column specified"); } String tableName = conf.get(TableInputFormat.INPUT_TABLE); HTable hTable = null; try { hTable = new HTable(conf, tableName); this.startKeys = hTable.getStartKeys(); byte[] indexBytes = hTable.getTableDescriptor().getValue(Constants.INDEX_SPEC_KEY); if (indexBytes != null) { TableIndices tableIndices = new TableIndices(); tableIndices.readFields(indexBytes); this.indices = tableIndices.getIndices(); } } finally { if (hTable != null) hTable.close(); } }
public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass) throws IOException { job.setInputFormatClass(inputFormatClass); if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass); if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); conf.set(TableInputFormat.INPUT_TABLE, table); conf.set(TableInputFormat.SCAN, convertScanToString(scan)); if (addDependencyJars) { addDependencyJars(job); } TableMapReduceUtil.initCredentials(job); }
private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException { Scan s = new Scan(); // Set Scan Versions s.setMaxVersions(Integer.MAX_VALUE); s.setCacheBlocks(false); // Set Scan Column Family if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) { s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY))); } // Set RowFilter or Prefix Filter if applicable. Filter rowFilter = getRowFilter(args); if (rowFilter!= null) { LOG.info("Setting Row Filter for counter."); s.setFilter(rowFilter); } // Set TimeRange if defined long timeRange[] = getTimeRange(args); if (timeRange != null) { LOG.info("Setting TimeRange for counter."); s.setTimeRange(timeRange[0], timeRange[1]); } LOG.warn("Got the Scan: " + s); return s; }
@Override public int run(String[] args) throws Exception { String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("ERROR: Wrong number of parameters: " + args.length); System.err.println("Usage: CellCounter "); System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " + "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]"); System.err.println(" Note: -D properties will be applied to the conf used. "); System.err.println(" Additionally, the following SCAN properties can be specified"); System.err.println(" to get fine grained control on what is counted.."); System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>"); System.err.println(" <reportSeparator> parameter can be used to override the default report separator " + "string : used to separate the rowId/column family name and qualifier name."); System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " + "operation to a limited subset of rows from the table based on regex or prefix pattern."); return -1; } Job job = createSubmittableJob(getConf(), otherArgs); return (job.waitForCompletion(true) ? 0 : 1); }
private static Configuration initializeHBaseConfig() { Configuration hbaseConfig = HBaseConfiguration.create(); hbaseConfig.set(TableInputFormat.INPUT_TABLE, Consts.TARGET_TABLE); hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000"); hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000"); hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1"); hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol"); hbaseConfig.set("hbase.distributed.cluster", "true"); hbaseConfig.set("hbase.zookeeper.quorum", Consts.ZOOKKEEPER_QUORUM); hbaseConfig.set("mapreduce.job.maps", "4"); hbaseConfig.set("mapred.map.tasks", "4"); hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4"); return hbaseConfig; }
private static Configuration initializeHBaseConfig() { Configuration hbaseConfig = HBaseConfiguration.create(); hbaseConfig.set(TableInputFormat.INPUT_TABLE, TARGET_TABLE); hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000"); hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000"); hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1"); hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol"); hbaseConfig.set("hbase.distributed.cluster", "true"); hbaseConfig.set("hbase.zookeeper.quorum", ZOOKKEEPER_QUORUM); hbaseConfig.set("mapreduce.job.maps", "4"); hbaseConfig.set("mapred.map.tasks", "4"); hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4"); return hbaseConfig; }
/** * Prepares a map reduce job. * @param tn The current table name. * @param familyName The current family name. * @param scan The current scan. * @param conf The current configuration. * @return A map reduce job. * @throws IOException */ private Job prepareJob(TableName tn, String familyName, Scan scan, Configuration conf) throws IOException { Job job = Job.getInstance(conf); job.setJarByClass(SweepMapper.class); TableMapReduceUtil.initTableMapperJob(tn.getNameAsString(), scan, SweepMapper.class, Text.class, Writable.class, job); job.setInputFormatClass(TableInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(KeyValue.class); job.setReducerClass(SweepReducer.class); job.setOutputFormatClass(NullOutputFormat.class); String jobName = getCustomJobName(this.getClass().getSimpleName(), tn.getNameAsString(), familyName); job.setJobName(jobName); if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) { String fileLoc = conf.get(CREDENTIALS_LOCATION); Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf); job.getCredentials().addAll(cred); } return job; }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The Splice table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(String table,Scan scan, Class<? extends Mapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Object> outputValueClass,Job job, boolean addDependencyJars,Class<? extends InputFormat> inputFormatClass) throws IOException{ job.setInputFormatClass(inputFormatClass); if(outputValueClass!=null) job.setMapOutputValueClass(outputValueClass); if(outputKeyClass!=null) job.setMapOutputKeyClass(outputKeyClass); if(mapper!=null) job.setMapperClass(mapper); job.getConfiguration().set(MRConstants.SPLICE_INPUT_TABLE_NAME,table); job.getConfiguration().set(TableInputFormat.SCAN,convertScanToString(scan)); if(addDependencyJars){ addDependencyJars(job); } }
public SMRecordReaderImpl getRecordReader(InputSplit split, Configuration config) throws IOException, InterruptedException { config.addResource(conf); if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "getRecordReader with table=%s, inputTable=%s," + "conglomerate=%s", table, config.get(TableInputFormat.INPUT_TABLE), config.get(MRConstants.SPLICE_INPUT_CONGLOMERATE)); rr = new SMRecordReaderImpl(conf); if(table == null){ TableName tableInfo = TableName.valueOf(config.get(TableInputFormat.INPUT_TABLE)); PartitionFactory tableFactory=SIDriver.driver().getTableFactory(); table = ((ClientPartition)tableFactory.getTable(tableInfo)).unwrapDelegate(); } rr.setHTable(table); if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "returning record reader"); return rr; }
@Override public void sourceConfInit(FlowProcess<JobConf> process, JobConf conf) { // a hack for MultiInputFormat to see that there is a child format FileInputFormat.setInputPaths(conf, getPath()); if (quorumNames != null) { conf.set("hbase.zookeeper.quorum", quorumNames); } LOG.debug("sourcing from table: {}", tableName); conf.set(TableInputFormat.INPUT_TABLE, tableName); if (null != base64Scan) conf.set(TableInputFormat.SCAN, base64Scan); super.sourceConfInit(process, conf); }
@Override public boolean execute() throws Exception { Configuration conf = getConf(); conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.USERTABLE_COLUMN_RATING); Job job = new Job(conf); job.setJobName("Prepare recommender: <" + getInputTable() + ">"); // mapper TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), RatingExportMap.class, ImmutableBytesWritable.class, Text.class, job); // reducer: job.setReducerClass(RatingExportReduce.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputFile)); return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled()); }
@Override public boolean execute() throws Exception { Configuration conf = getConf(); conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.RECOMMENDATION_COLUMN); Job job = new Job(conf); job.setJobName("Prepare recommender: <" + getInputTable() + ">"); // mapper TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), RecommendationsExportMap.class, ImmutableBytesWritable.class, Text.class, job); // reducer: job.setReducerClass(Reducer.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputFile)); return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled()); }
@Override public boolean execute() throws Exception { Configuration conf = getConf(); conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.COLLECTION_TABLE_COLUMN_INTR); Job job = new Job(conf); job.setJobName("Prepare recommender: <" + getInputTable() + ">"); // mapper TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), CollectionExportMap.class, ImmutableBytesWritable.class, Text.class, job); // reducer: job.setReducerClass(CollectionExportReduce.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputFile)); return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled()); }
@Override public boolean execute() throws Exception { Configuration conf = getConf(); conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.USERTABLE_COLUMN_FOAF); Job job = new Job(conf); job.setJobName("Prepare recommender: <" + getInputTable() + ">"); // mapper TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), UserExportMap.class, ImmutableBytesWritable.class, Text.class, job); // reducer: job.setReducerClass(UserExportReduce.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputFile)); return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled()); }
public static List<Put> getIndexPut(Put userPut, Configuration conf) throws IOException { String tableName = conf.get(TableInputFormat.INPUT_TABLE); IndexedHTableDescriptor tableDescriptor = getTableDescriptor(tableName, conf); List<Put> indexPuts = new ArrayList<Put>(); if (tableDescriptor != null) { List<IndexSpecification> indices = tableDescriptor.getIndices(); for (IndexSpecification index : indices) { byte[] startkey = getStartKey(conf, tableName, userPut.getRow()); Put indexPut = IndexUtils.prepareIndexPut(userPut, index, startkey); if (indexPut != null) { indexPuts.add(indexPut); } } } return indexPuts; }
public static List<Delete> getIndexDelete(Delete userDelete, Configuration conf) throws IOException { String tableName = conf.get(TableInputFormat.INPUT_TABLE); IndexedHTableDescriptor tableDescriptor = getTableDescriptor(tableName, conf); List<Delete> indexDeletes = new ArrayList<Delete>(); if (tableDescriptor != null) { List<IndexSpecification> indices = tableDescriptor.getIndices(); for (IndexSpecification index : indices) { byte[] startkey = getStartKey(conf, tableName, userDelete.getRow()); Delete indexDelete = IndexUtils.prepareIndexDelete(userDelete, index, startkey); if (indexDelete != null) { indexDeletes.add(indexDelete); } } } return indexDeletes; }
Job createSubmittableJob(final String[] args) throws IOException { Configuration configFromArgs = parseArguments(args); if (configFromArgs == null || sourceTableNameString == null) { return null; } getConf().addResource(configFromArgs); getConf().setBoolean(Repository.MAP_SPECULATIVE_CONF_KEY, true); // no redundant processing Job job = Job.getInstance( getConf(), getConf().get(Repository.JOB_NAME_CONF_KEY, sourceTableNameString)); TableMapReduceUtil.addDependencyJars(job); Scan scan = new Scan(); // note that user can override scan row-caching by setting TableInputFormat.SCAN_CACHEDROWS scan.setCaching(getConf().getInt(TableInputFormat.SCAN_CACHEDROWS, 500)); scan.setCacheBlocks(false); // should be false for MapReduce jobs if (!verboseReport && !reportType.equals(ReportType.VALUE)) { scan.setFilter(new KeyOnlyFilter(true)); } if (includeAllCells) { scan.setMaxVersions(); } if (sourceColFamily != null) { scan.addFamily(sourceColFamily); } TableMapReduceUtil.initTableMapperJob(sourceTableNameString, scan, ColumnInvalidityReportMapper.class, null, // mapper output key is null null, // mapper output value is null job); job.setOutputFormatClass(NullOutputFormat.class); // no Mapper output, no Reducer return job; }
Job createSubmittableJob(final String[] args) throws IOException { if (!parseArguments(args)) { return null; } getConf().setBoolean(Repository.MAP_SPECULATIVE_CONF_KEY, true); // no redundant processing getConf().set(Repository.TABLE_NAME_CONF_KEY, sourceTableNameString); Job job = Job.getInstance( getConf(), getConf().get(Repository.JOB_NAME_CONF_KEY, sourceTableNameString)); TableMapReduceUtil.addDependencyJars(job); Scan scan = new Scan(); // note that user can override scan row-caching by setting TableInputFormat.SCAN_CACHEDROWS scan.setCaching(getConf().getInt(TableInputFormat.SCAN_CACHEDROWS, 500)); scan.setCacheBlocks(false); // should be false for scanning in MapReduce jobs scan.setFilter(new KeyOnlyFilter(true)); if (includeAllCells) { scan.setMaxVersions(); } TableMapReduceUtil.initTableMapperJob( sourceTableNameString, scan, ColumnDiscoveryMapper.class, null, // mapper output key is null null, // mapper output value is null job); job.setOutputFormatClass(NullOutputFormat.class); // no Mapper output, no Reducer return job; }
@Override public void setConf(final Configuration config) { super.setConf(config); //config.set(TableInputFormat.SCAN_COLUMN_FAMILY, Backend.EDGESTORE_NAME); config.set(TableInputFormat.INPUT_TABLE, inputConf.get(HBaseStoreManager.HBASE_TABLE)); //config.set(HConstants.ZOOKEEPER_QUORUM, config.get(TITAN_HADOOP_GRAPH_INPUT_TITAN_STORAGE_HOSTNAME)); config.set(HConstants.ZOOKEEPER_QUORUM, inputConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]); // if (basicConf.get(TITAN_HADOOP_GRAPH_INPUT_TITAN_STORAGE_PORT, null) != null) if (inputConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) config.set(HConstants.ZOOKEEPER_CLIENT_PORT, String.valueOf(inputConf.get(GraphDatabaseConfiguration.STORAGE_PORT))); config.set("autotype", "none"); log.debug("hbase.security.authentication={}", config.get("hbase.security.authentication")); Scan scanner = new Scan(); // TODO the mapping is private in HBaseStoreManager and leaks here -- replace String database/CF names with an enum where each value has both a short and long name if (inputConf.get(HBaseStoreManager.SHORT_CF_NAMES)) { scanner.addFamily("e".getBytes()); edgestoreFamily = Bytes.toBytes("e"); } else { scanner.addFamily(Backend.EDGESTORE_NAME.getBytes()); edgestoreFamily = Bytes.toBytes(Backend.EDGESTORE_NAME); } //scanner.setFilter(getColumnFilter(titanSetup.inputSlice(this.vertexQuery))); scanner.setFilter(getColumnFilter(TitanHadoopSetupCommon.getDefaultSliceQuery())); //TODO (minor): should we set other options in http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html for optimization? Method converter; try { converter = TableMapReduceUtil.class.getDeclaredMethod("convertScanToString", Scan.class); converter.setAccessible(true); config.set(TableInputFormat.SCAN, (String) converter.invoke(null, scanner)); } catch (Exception e) { throw new RuntimeException(e); } this.tableInputFormat.setConf(config); }
@Override public InputFormat getInputFormat() { TableInputFormat inputFormat = new HBaseTableIFBuilder() .withLimit(limit_) .withGt(gt_) .withGte(gte_) .withLt(lt_) .withLte(lte_) .withConf(m_conf) .build(); return inputFormat; }
@Override public void setLocation(String location, Job job) throws IOException { Properties udfProps = getUDFProperties(); job.getConfiguration().setBoolean("pig.noSplitCombination", true); initialiseHBaseClassLoaderResources(job); m_conf = initializeLocalJobConfig(job); String delegationTokenSet = udfProps.getProperty(HBASE_TOKEN_SET); if (delegationTokenSet == null) { addHBaseDelegationToken(m_conf, job); udfProps.setProperty(HBASE_TOKEN_SET, "true"); } String tablename = location; if (location.startsWith("hbase://")) { tablename = location.substring(8); } m_conf.set(TableInputFormat.INPUT_TABLE, tablename); String projectedFields = udfProps.getProperty( projectedFieldsName() ); if (projectedFields != null) { // update columnInfo_ pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields)); } addFiltersWithoutColumnPrefix(columnInfo_); if (requiredFieldList != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] {contextSignature}); p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList)); } m_conf.set(TableInputFormat.SCAN, convertScanToString(scan)); }
@Override public InputFormat getInputFormat() { TableInputFormat inputFormat = new HBaseTableIFBuilder() .withLimit(limit_) .withGt(gt_) .withGte(gte_) .withLt(lt_) .withLte(lte_) .withConf(m_conf) .build(); inputFormat.setScan(scan); return inputFormat; }
@Override public void setLocation(String location, Job job) throws IOException { Properties udfProps = getUDFProperties(); job.getConfiguration().setBoolean("pig.noSplitCombination", true); m_conf = initializeLocalJobConfig(job); String delegationTokenSet = udfProps.getProperty(HBASE_TOKEN_SET); if (delegationTokenSet == null) { addHBaseDelegationToken(m_conf, job); udfProps.setProperty(HBASE_TOKEN_SET, "true"); } String tablename = location; if (location.startsWith("hbase://")) { tablename = location.substring(8); } m_conf.set(TableInputFormat.INPUT_TABLE, tablename); String projectedFields = udfProps.getProperty( projectedFieldsName() ); if (projectedFields != null) { // update columnInfo_ pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields)); } addFiltersWithoutColumnPrefix(columnInfo_); if (requiredFieldList != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] {contextSignature}); p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList)); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { this.conf = context.getConfiguration(); this.fs = FileSystem.get(conf); // the MOB_COMPACTION_DELAY is ONE_DAY by default. Its value is only changed when testing. mobCompactionDelay = conf.getLong(SweepJob.MOB_COMPACTION_DELAY, SweepJob.ONE_DAY); String tableName = conf.get(TableInputFormat.INPUT_TABLE); String familyName = conf.get(TableInputFormat.SCAN_COLUMN_FAMILY); this.familyDir = MobUtils.getMobFamilyPath(conf, TableName.valueOf(tableName), familyName); HBaseAdmin admin = new HBaseAdmin(this.conf); try { family = admin.getTableDescriptor(Bytes.toBytes(tableName)).getFamily( Bytes.toBytes(familyName)); } finally { try { admin.close(); } catch (IOException e) { LOG.warn("Fail to close the HBaseAdmin", e); } } // disable the block cache. Configuration copyOfConf = new Configuration(conf); copyOfConf.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.00001f); this.cacheConfig = new CacheConfig(copyOfConf); table = new HTable(this.conf, Bytes.toBytes(tableName)); table.setAutoFlush(false, false); table.setWriteBufferSize(1 * 1024 * 1024); // 1MB memstore = new MemStoreWrapper(context, fs, table, family, new MemStore(), cacheConfig); // The start time of the sweep tool. // Only the mob files whose creation time is older than startTime-oneDay will be handled by the // reducer since it brings inconsistency to handle the latest mob files. this.compactionBegin = conf.getLong(MobConstants.MOB_COMPACTION_START_DATE, 0); }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); mappersForOneRegion = conf.getInt(MAPPERS_FOR_ONE_REGION, mappersForOneRegion); String vertexTableName = conf.get(TableInputFormat.INPUT_TABLE); if (null == vertexTableName || "".equals(vertexTableName)) { throw new IllegalArgumentException(TableInputFormat.INPUT_TABLE + " shall not be empty or null"); } vertexTable = new HTable(conf, vertexTableName); }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); bypassKeys = conf.getInt(BY_PASS_KEYS, bypassKeys); String vertexTableName = conf.get(TableInputFormat.INPUT_TABLE); if (null == vertexTableName || "".equals(vertexTableName)) { throw new IllegalArgumentException(TableInputFormat.INPUT_TABLE + " shall not be empty or null"); } vertexTable = new HTable(conf, vertexTableName); }
public static Job createSubmittableJob(Configuration conf, String tableName, String outputPath) throws IOException { Validate.notEmpty(tableName, "tableName shall always not be empty"); Validate.notEmpty(outputPath, "outputPath shall always not be empty"); long timestamp = System.currentTimeMillis(); Job job = null; String jobName = null; try { jobName = "GetNoCoumnsRows_" + timestamp; LOGGER.info("start to run job:" + jobName); job = new Job(conf, jobName); job.setJarByClass(GetNoColumnsRows.class); LOGGER.info("tableName=" + tableName); LOGGER.info("outputPath=" + outputPath); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob(tableName, scan, Mapper.class, Text.class, NullWritable.class, job, true, TableInputFormat.class); // only mapper job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, new Path(outputPath)); } catch (IOException e) { LOGGER.error("run " + jobName + " failed", e); throw e; } return job; }
private static Job createInitialPageRankJob(Configuration conf, String outputBasePath, Class<? extends TableInputFormat> tableInputFormat) throws IOException { String tableName = conf.get(HBaseGraphConstants.HBASE_GRAPH_TABLE_VERTEX_NAME_KEY); long timestamp = System.currentTimeMillis(); Job job = null; String jobName = null; try { jobName = "CalculateInitPageRank_" + timestamp; LOGGER.info("start to run job:" + jobName); job = new Job(conf, jobName); job.setJarByClass(Driver.class); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob(tableName, scan, CalculateInitPageRankMapper.class, Text.class, DoubleWritable.class, job, true, tableInputFormat); job.setReducerClass(CalculatePageRankReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); String outputPath = outputBasePath + "/" + timestamp; LOGGER.info("outputPath=" + outputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); } catch (IOException e) { LOGGER.error("run " + jobName + " failed", e); throw e; } return job; }
public static Job createSubmittableJob(Configuration conf, String tableName, String outputPath) throws IOException { Validate.notEmpty(tableName, "tableName shall always not be empty"); Validate.notEmpty(outputPath, "outputPath shall always not be empty"); long timestamp = System.currentTimeMillis(); Job job = null; String jobName = null; try { jobName = GetRandomRowsByRegions.class.getSimpleName() + "_" + timestamp; LOGGER.info("start to run job:" + jobName); job = new Job(conf, jobName); job.setJarByClass(GetRandomRowsByRegions.class); LOGGER.info("tableName=" + tableName); LOGGER.info("outputPath=" + outputPath); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob(tableName, scan, Mapper.class, Text.class, NullWritable.class, job, true, TableInputFormat.class); // only mapper job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, new Path(outputPath)); } catch (IOException e) { LOGGER.error("run " + jobName + " failed", e); throw e; } return job; }
@Override public void setLocation(String location, Job job) throws IOException { job.getConfiguration().setBoolean("pig.noSplitCombination", true); m_conf = initialiseHBaseClassLoaderResources(job); String tablename = location; if (location.startsWith("hbase://")){ tablename = location.substring(8); } if (m_table == null) { m_table = new HTable(m_conf, tablename); } m_table.setScannerCaching(caching_); m_conf.set(TableInputFormat.INPUT_TABLE, tablename); String projectedFields = getUDFProperties().getProperty( projectedFieldsName() ); if (projectedFields != null) { // update columnInfo_ pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields)); } for (ColumnInfo columnInfo : columnInfo_) { // do we have a column family, or a column? if (columnInfo.isColumnMap()) { scan.addFamily(columnInfo.getColumnFamily()); } else { scan.addColumn(columnInfo.getColumnFamily(), columnInfo.getColumnName()); } } if (requiredFieldList != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] {contextSignature}); p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList)); } m_conf.set(TableInputFormat.SCAN, convertScanToString(scan)); }
/** * Allows subclasses to set the {@link HTable}. * * @param table The table to get the data from. */ protected void setHTable(Table table) { if (table == null) throw new IllegalArgumentException("Unexpected null value for 'table'."); this.table = table; if (conf == null) throw new RuntimeException("Unexpected null value for 'conf'"); conf.set(TableInputFormat.INPUT_TABLE, table.getName().getNameAsString()); }