@Test public void testNodeDistribution() throws IOException, InterruptedException { DummyInputFormat inFormat = new DummyInputFormat(); int numBlocks = 60; long totLength = 0; long blockSize = 100; int numNodes = 10; long minSizeNode = 50; long minSizeRack = 50; int maxSplitSize = 200; // 4 blocks per split. String[] locations = new String[numNodes]; for (int i = 0; i < numNodes; i++) { locations[i] = "h" + i; } String[] racks = new String[0]; Path path = new Path("hdfs://file"); OneBlockInfo[] blocks = new OneBlockInfo[numBlocks]; int hostCountBase = 0; // Generate block list. Replication 3 per block. for (int i = 0; i < numBlocks; i++) { int localHostCount = hostCountBase; String[] blockHosts = new String[3]; for (int j = 0; j < 3; j++) { int hostNum = localHostCount % numNodes; blockHosts[j] = "h" + hostNum; localHostCount++; } hostCountBase++; blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts, racks); totLength += blockSize; } List<InputSplit> splits = new ArrayList<InputSplit>(); HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>(); HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>(); HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>(); Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>(); OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes); inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSplitSize, minSizeNode, minSizeRack, splits); int expectedSplitCount = (int) (totLength / maxSplitSize); assertEquals(expectedSplitCount, splits.size()); // Ensure 90+% of the splits have node local blocks. // 100% locality may not always be achieved. int numLocalSplits = 0; for (InputSplit inputSplit : splits) { assertEquals(maxSplitSize, inputSplit.getLength()); if (inputSplit.getLocations().length == 1) { numLocalSplits++; } } assertTrue(numLocalSplits >= 0.9 * splits.size()); }
@Test public void testNodeInputSplit() throws IOException, InterruptedException { // Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on // both nodes. The grouping ensures that both nodes get splits instead of // just the first node DummyInputFormat inFormat = new DummyInputFormat(); int numBlocks = 12; long totLength = 0; long blockSize = 100; long maxSize = 200; long minSizeNode = 50; long minSizeRack = 50; String[] locations = { "h1", "h2" }; String[] racks = new String[0]; Path path = new Path("hdfs://file"); OneBlockInfo[] blocks = new OneBlockInfo[numBlocks]; for(int i=0; i<numBlocks; ++i) { blocks[i] = new OneBlockInfo(path, i*blockSize, blockSize, locations, racks); totLength += blockSize; } List<InputSplit> splits = new ArrayList<InputSplit>(); HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>(); HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>(); HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>(); HashMap<String, Set<OneBlockInfo>> nodeToBlocks = new HashMap<String, Set<OneBlockInfo>>(); OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes); inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSize, minSizeNode, minSizeRack, splits); int expectedSplitCount = (int)(totLength/maxSize); assertEquals(expectedSplitCount, splits.size()); HashMultiset<String> nodeSplits = HashMultiset.create(); for(int i=0; i<expectedSplitCount; ++i) { InputSplit inSplit = splits.get(i); assertEquals(maxSize, inSplit.getLength()); assertEquals(1, inSplit.getLocations().length); nodeSplits.add(inSplit.getLocations()[0]); } assertEquals(3, nodeSplits.count(locations[0])); assertEquals(3, nodeSplits.count(locations[1])); }
public void testNodeDistribution() throws IOException, InterruptedException { DummyInputFormat inFormat = new DummyInputFormat(); int numBlocks = 60; long totLength = 0; long blockSize = 100; int numNodes = 10; long minSizeNode = 50; long minSizeRack = 50; int maxSplitSize = 200; // 4 blocks per split. String[] locations = new String[numNodes]; for (int i = 0; i < numNodes; i++) { locations[i] = "h" + i; } String[] racks = new String[0]; Path path = new Path("hdfs://file"); OneBlockInfo[] blocks = new OneBlockInfo[numBlocks]; int hostCountBase = 0; // Generate block list. Replication 3 per block. for (int i = 0; i < numBlocks; i++) { int localHostCount = hostCountBase; String[] blockHosts = new String[3]; for (int j = 0; j < 3; j++) { int hostNum = localHostCount % numNodes; blockHosts[j] = "h" + hostNum; localHostCount++; } hostCountBase++; blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts, racks); totLength += blockSize; } List<InputSplit> splits = new ArrayList<InputSplit>(); HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>(); HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>(); HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>(); Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>(); OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes); inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSplitSize, minSizeNode, minSizeRack, splits); int expectedSplitCount = (int) (totLength / maxSplitSize); Assert.assertEquals(expectedSplitCount, splits.size()); // Ensure 90+% of the splits have node local blocks. // 100% locality may not always be achieved. int numLocalSplits = 0; for (InputSplit inputSplit : splits) { Assert.assertEquals(maxSplitSize, inputSplit.getLength()); if (inputSplit.getLocations().length == 1) { numLocalSplits++; } } Assert.assertTrue(numLocalSplits >= 0.9 * splits.size()); }
public void testNodeInputSplit() throws IOException, InterruptedException { // Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on // both nodes. The grouping ensures that both nodes get splits instead of // just the first node DummyInputFormat inFormat = new DummyInputFormat(); int numBlocks = 12; long totLength = 0; long blockSize = 100; long maxSize = 200; long minSizeNode = 50; long minSizeRack = 50; String[] locations = { "h1", "h2" }; String[] racks = new String[0]; Path path = new Path("hdfs://file"); OneBlockInfo[] blocks = new OneBlockInfo[numBlocks]; for(int i=0; i<numBlocks; ++i) { blocks[i] = new OneBlockInfo(path, i*blockSize, blockSize, locations, racks); totLength += blockSize; } List<InputSplit> splits = new ArrayList<InputSplit>(); HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>(); HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>(); HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>(); HashMap<String, Set<OneBlockInfo>> nodeToBlocks = new HashMap<String, Set<OneBlockInfo>>(); OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes); inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSize, minSizeNode, minSizeRack, splits); int expectedSplitCount = (int)(totLength/maxSize); Assert.assertEquals(expectedSplitCount, splits.size()); HashMultiset<String> nodeSplits = HashMultiset.create(); for(int i=0; i<expectedSplitCount; ++i) { InputSplit inSplit = splits.get(i); Assert.assertEquals(maxSize, inSplit.getLength()); Assert.assertEquals(1, inSplit.getLocations().length); nodeSplits.add(inSplit.getLocations()[0]); } Assert.assertEquals(3, nodeSplits.count(locations[0])); Assert.assertEquals(3, nodeSplits.count(locations[1])); }