private void createWriteTable(int numberOfServers) throws IOException { int numberOfRegions = (int)(numberOfServers * regionsLowerLimit); LOG.info("Number of live regionservers: " + numberOfServers + ", " + "pre-splitting the canary table into " + numberOfRegions + " regions " + "(current lower limi of regions per server is " + regionsLowerLimit + " and you can change it by config: " + HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )"); HTableDescriptor desc = new HTableDescriptor(writeTableName); HColumnDescriptor family = new HColumnDescriptor(CANARY_TABLE_FAMILY_NAME); family.setMaxVersions(1); family.setTimeToLive(writeDataTTL); desc.addFamily(family); byte[][] splits = new RegionSplitter.HexStringSplit().split(numberOfRegions); admin.createTable(desc, splits); }
private void createTable() throws Exception { deleteTable(); LOG.info("Creating table"); Configuration conf = util.getConfiguration(); String encodingKey = String.format(ENCODING_KEY, this.getClass().getSimpleName()); DataBlockEncoding blockEncoding = DataBlockEncoding.valueOf(conf.get(encodingKey, "FAST_DIFF")); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); for (byte[] cf : dataGen.getColumnFamilies()) { HColumnDescriptor hcd = new HColumnDescriptor(cf); hcd.setDataBlockEncoding(blockEncoding); htd.addFamily(hcd); } int serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize(); byte[][] splits = new RegionSplitter.HexStringSplit().split(serverCount * REGIONS_PER_SERVER); util.getHBaseAdmin().createTable(htd, splits); LOG.info("Created table"); }
@Override public void run() { long startTime, endTime; HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); desc.addFamily(new HColumnDescriptor(COLUMN_NAME)); SplitAlgorithm algo = new RegionSplitter.HexStringSplit(); byte[][] splits = algo.split(REGION_COUNT); LOG.info(String.format("Creating table %s with %d splits.", TABLE_NAME, REGION_COUNT)); startTime = System.currentTimeMillis(); try { admin.createTable(desc, splits); endTime = System.currentTimeMillis(); success = true; LOG.info(String.format("Pre-split table created successfully in %dms.", (endTime - startTime))); } catch (IOException e) { LOG.error("Failed to create table", e); } finally { doneSignal.countDown(); } }
@Override public void run() { long startTime, endTime; HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(TABLE_NAME)); desc.addFamily(new HColumnDescriptor(COLUMN_NAME)); SplitAlgorithm algo = new RegionSplitter.HexStringSplit(); byte[][] splits = algo.split(REGION_COUNT); LOG.info(String.format("Creating table %s with %d splits.", TABLE_NAME, REGION_COUNT)); startTime = System.currentTimeMillis(); try { admin.createTable(desc, splits); endTime = System.currentTimeMillis(); success = true; LOG.info(String.format("Pre-split table created successfully in %dms.", (endTime - startTime))); } catch (IOException e) { LOG.error("Failed to create table", e); } finally { doneSignal.countDown(); } }
private void createWriteTable(int numberOfServers) throws IOException { int numberOfRegions = (int)(numberOfServers * regionsLowerLimit); LOG.info("Number of live regionservers: " + numberOfServers + ", " + "pre-splitting the canary table into " + numberOfRegions + " regions " + "(current lower limit of regions per server is " + regionsLowerLimit + " and you can change it by config: " + HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY + " )"); HTableDescriptor desc = new HTableDescriptor(writeTableName); HColumnDescriptor family = new HColumnDescriptor(CANARY_TABLE_FAMILY_NAME); family.setMaxVersions(1); family.setTimeToLive(writeDataTTL); desc.addFamily(family); byte[][] splits = new RegionSplitter.HexStringSplit().split(numberOfRegions); admin.createTable(desc, splits); }
/** * Configures the job to use TableSnapshotInputFormat to read from a snapshot. * @param conf the job to configure * @param snapshotName the name of the snapshot to read from * @param restoreDir a temporary directory to restore the snapshot into. Current user should * have write permissions to this directory, and this should not be a subdirectory of rootdir. * After the job is finished, restoreDir can be deleted. * @param numSplitsPerRegion how many input splits to generate per one region * @param splitAlgo SplitAlgorithm to be used when generating InputSplits * @throws IOException if an error occurs */ public static void setInput(Configuration conf, String snapshotName, Path restoreDir, RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException { conf.set(SNAPSHOT_NAME_KEY, snapshotName); if (numSplitsPerRegion < 1) { throw new IllegalArgumentException("numSplits must be >= 1, " + "illegal numSplits : " + numSplitsPerRegion); } if (splitAlgo == null && numSplitsPerRegion > 1) { throw new IllegalArgumentException("Split algo can't be null when numSplits > 1"); } if (splitAlgo != null) { conf.set(SPLIT_ALGO, splitAlgo.getClass().getName()); } conf.setInt(NUM_SPLITS_PER_REGION, numSplitsPerRegion); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); restoreDir = new Path(restoreDir, UUID.randomUUID().toString()); // TODO: restore from record readers to parallelize. RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName); conf.set(RESTORE_DIR_KEY, restoreDir.toString()); }
private void createTable() throws Exception { deleteTable(); LOG.info("Creating table"); Configuration conf = util.getConfiguration(); String encodingKey = String.format(ENCODING_KEY, this.getClass().getSimpleName()); DataBlockEncoding blockEncoding = DataBlockEncoding.valueOf(conf.get(encodingKey, "FAST_DIFF")); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); for (byte[] cf : dataGen.getColumnFamilies()) { HColumnDescriptor hcd = new HColumnDescriptor(cf); hcd.setDataBlockEncoding(blockEncoding); htd.addFamily(hcd); } int serverCount = util.getHBaseClusterInterface().getClusterMetrics() .getLiveServerMetrics().size(); byte[][] splits = new RegionSplitter.HexStringSplit().split(serverCount * REGIONS_PER_SERVER); util.getAdmin().createTable(htd, splits); LOG.info("Created table"); }
@Test public void testCreateTableWithRegions() throws Exception { HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); desc.addFamily(new HColumnDescriptor("cf")); SplitAlgorithm algo = new RegionSplitter.HexStringSplit(); byte[][] splits = algo.split(REGION_COUNT); LOG.info(String.format("Creating table %s with %d splits.", TABLE_NAME, REGION_COUNT)); long startTime = System.currentTimeMillis(); try { admin.createTable(desc, splits); LOG.info(String.format("Pre-split table created successfully in %dms.", (System.currentTimeMillis() - startTime))); } catch (IOException e) { LOG.error("Failed to create table", e); } }
protected void createTable(HTableDescriptor htd) throws Exception { deleteTable(); if (util.getHBaseClusterInterface() instanceof MiniHBaseCluster) { LOG.warn("Test does not make a lot of sense for minicluster. Will set flush size low."); htd.setConfiguration(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, "1048576"); } byte[][] splits = new RegionSplitter.HexStringSplit().split( util.getHBaseClusterInterface().getClusterStatus().getServersSize()); util.getHBaseAdmin().createTable(htd, splits); }
public void createMirroredTableIfNotExists(String hbaseTableName, Integer versions) { try { if (!DRY_RUN) { if (connection == null) { connection = ConnectionFactory.createConnection(hbaseConf); } Admin admin = connection.getAdmin(); TableName tableName = TableName.valueOf(hbaseTableName); if (!admin.tableExists(tableName)) { LOGGER.info("table " + hbaseTableName + " does not exist in HBase. Creating..."); HTableDescriptor tableDescriptor = new HTableDescriptor(tableName); HColumnDescriptor cd = new HColumnDescriptor("d"); cd.setMaxVersions(versions); tableDescriptor.addFamily(cd); // presplit into 16 regions RegionSplitter.HexStringSplit splitter = new RegionSplitter.HexStringSplit(); byte[][] splitKeys = splitter.split(MIRRORED_TABLE_DEFAULT_REGIONS); admin.createTable(tableDescriptor, splitKeys); } knownHBaseTables.put(hbaseTableName, 1); } } catch (IOException e) { LOGGER.info("Failed to create table in HBase."); // TODO: wait and retry if failed. After a while set status of applier // to 'blocked' & handle by overseer by stopping the replicator e.printStackTrace(); } }
@Test public void testSplit10_10() throws Exception { int numRegions = 10; int cardinality = 10; RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality); byte[][] splits = splitAlgorithm.split(numRegions); assertEquals(numRegions - 1, splits.length); int digits = 2; assertEquals(String.format("%0" + digits + "d", 1), Bytes.toString(splits[0])); assertEquals(String.format("%0" + digits + "d", 9), Bytes.toString(splits[numRegions - 2])); }
@Test public void testSplit3_10() throws Exception { int numRegions = 3; int cardinality = 10; RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality); byte[][] splits = splitAlgorithm.split(numRegions); assertEquals(numRegions - 1, splits.length); int digits = 2; assertEquals(String.format("%0" + digits + "d", 3), Bytes.toString(splits[0])); assertEquals(String.format("%0" + digits + "d", 6), Bytes.toString(splits[numRegions - 2])); }
@Test public void testSplit300_1000() throws Exception { int numRegions = 300; int cardinality = 1000; RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality); byte[][] splits = splitAlgorithm.split(numRegions); assertEquals(numRegions - 1, splits.length); int digits = 4; assertEquals(String.format("%0" + digits + "d", 3), Bytes.toString(splits[0])); assertEquals(String.format("%0" + digits + "d", 6), Bytes.toString(splits[1])); assertEquals(String.format("%0" + digits + "d", 10), Bytes.toString(splits[2])); assertEquals(String.format("%0" + digits + "d", 996), Bytes.toString(splits[numRegions - 2])); }
/** * Creates a pre-split table for load testing. If the table already exists, * logs a warning and continues. * @return the number of regions the table was split into */ public static int createPreSplitLoadTestTable(Configuration conf, TableDescriptor desc, ColumnFamilyDescriptor[] hcds, int numRegionsPerServer) throws IOException { return createPreSplitLoadTestTable(conf, desc, hcds, new RegionSplitter.HexStringSplit(), numRegionsPerServer); }
@Override protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int numSplitsPerRegion, int expectedNumSplits, boolean setLocalityEnabledTo) throws Exception { setupCluster(); final TableName tableName = TableName.valueOf(name.getMethodName()); try { createTableAndSnapshot( util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions); JobConf job = new JobConf(util.getConfiguration()); // setLocalityEnabledTo is ignored no matter what is specified, so as to test the case that // SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY is not explicitly specified // and the default value is taken. Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName); if (numSplitsPerRegion > 1) { TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir, new RegionSplitter.UniformSplit(), numSplitsPerRegion); } else { TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir); } // mapred doesn't support start and end keys? o.O verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow()); } finally { util.getAdmin().deleteSnapshot(snapshotName); util.deleteTable(tableName); tearDownCluster(); } }
@Override public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int numSplitsPerRegion, int expectedNumSplits, boolean setLocalityEnabledTo) throws Exception { setupCluster(); final TableName tableName = TableName.valueOf(name.getMethodName()); try { createTableAndSnapshot( util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions); Configuration conf = util.getConfiguration(); conf.setBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY, setLocalityEnabledTo); Job job = new Job(conf); Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName); Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan if (numSplitsPerRegion > 1) { TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir, new RegionSplitter.UniformSplit(), numSplitsPerRegion); } else { TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir); } verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow()); } finally { util.getAdmin().deleteSnapshot(snapshotName); util.deleteTable(tableName); tearDownCluster(); } }
protected void createTable(HTableDescriptor htd) throws Exception { deleteTable(); if (util.getHBaseClusterInterface() instanceof MiniHBaseCluster) { LOG.warn("Test does not make a lot of sense for minicluster. Will set flush size low."); htd.setConfiguration(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, "1048576"); } byte[][] splits = new RegionSplitter.HexStringSplit().split( util.getHBaseClusterInterface().getClusterMetrics().getLiveServerMetrics().size()); util.getAdmin().createTable(htd, splits); }
private byte[][] calculateRegionSplits(MongoURI uri, String tableName) throws Exception { DBCollection collection = uri.connectDB().getCollection(uri.getCollection()); long size = collection.getStats().getLong("size"); long regionSize = ConfigUtil.getPresplitTableRegionSize(_conf); int numRegions = (int) Math.min((size / regionSize) + 1, 4096); if (numRegions > 1) { log.info("Pre-splitting " + tableName + " into " + numRegions + " regions"); RegionSplitter.UniformSplit splitter = new RegionSplitter.UniformSplit(); return splitter.split(numRegions); } else { log.info("Not splitting " + tableName + ", because the data can fit into a single region"); return new byte[0][0]; } }
private void createTable() throws Exception { deleteTable(); LOG.info("Creating table"); HTableDescriptor htd = new HTableDescriptor(Bytes.toBytes(TABLE_NAME)); for (byte[] cf : dataGen.getColumnFamilies()) { htd.addFamily(new HColumnDescriptor(cf)); } int serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize(); byte[][] splits = new RegionSplitter.HexStringSplit().split(serverCount * REGIONS_PER_SERVER); util.getHBaseAdmin().createTable(htd, splits); LOG.info("Created table"); }
private byte[][] getSplits(int numRegions) { RegionSplitter.UniformSplit split = new RegionSplitter.UniformSplit(); split.setFirstRow(Bytes.toBytes(0L)); split.setLastRow(Bytes.toBytes(Long.MAX_VALUE)); return split.split(numRegions); }
protected void createSchema() throws IOException { Configuration conf = getConf(); TableName tableName = getTableName(conf); try (Connection conn = ConnectionFactory.createConnection(conf); Admin admin = conn.getAdmin()) { if (!admin.tableExists(tableName)) { HTableDescriptor htd = new HTableDescriptor(getTableName(getConf())); htd.addFamily(new HColumnDescriptor(FAMILY_NAME)); // Always add these families. Just skip writing to them when we do not test per CF flush. htd.addFamily(new HColumnDescriptor(BIG_FAMILY_NAME)); htd.addFamily(new HColumnDescriptor(TINY_FAMILY_NAME)); // If we want to pre-split compute how many splits. if (conf.getBoolean(HBaseTestingUtility.PRESPLIT_TEST_TABLE_KEY, HBaseTestingUtility.PRESPLIT_TEST_TABLE)) { int numberOfServers = admin.getClusterStatus().getServers().size(); if (numberOfServers == 0) { throw new IllegalStateException("No live regionservers"); } int regionsPerServer = conf.getInt(HBaseTestingUtility.REGIONS_PER_SERVER_KEY, HBaseTestingUtility.DEFAULT_REGIONS_PER_SERVER); int totalNumberOfRegions = numberOfServers * regionsPerServer; LOG.info("Number of live regionservers: " + numberOfServers + ", " + "pre-splitting table into " + totalNumberOfRegions + " regions " + "(default regions per server: " + regionsPerServer + ")"); byte[][] splits = new RegionSplitter.UniformSplit().split(totalNumberOfRegions); admin.createTable(htd, splits); } else { // Looks like we're just letting things play out. // Create a table with on region by default. // This will make the splitting work hard. admin.createTable(htd); } } } catch (MasterNotRunningException e) { LOG.error("Master not running", e); throw new IOException(e); } }
@Override public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) { return new RegionSplitter.HexStringSplit(); }
@Override public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) { return new DecimalStringSplit(cardinality); }