@Test public void testCustomCompressionCodec() throws Exception { HdfsDTarget dTarget = new ForTestHdfsTarget(); configure(dTarget); dTarget.hdfsTargetConfigBean.compression = CompressionMode.OTHER; dTarget.hdfsTargetConfigBean.otherCompression = DeflateCodec.class.getName(); HdfsTarget target = (HdfsTarget) dTarget.createTarget(); try { Target.Context context = ContextInfoCreator.createTargetContext(HdfsDTarget.class, "n", false, OnRecordError.TO_ERROR, null); target.init(null, context); Assert.assertEquals(DeflateCodec.class, target.getCompressionCodec().getClass()); } finally { target.destroy(); } }
@Test public final void testInsertOverwriteWithCompression() throws Exception { String tableName = IdentifierUtil.normalizeIdentifier("testInsertOverwriteWithCompression"); ResultSet res = executeFile("testInsertOverwriteWithCompression_ddl.sql"); res.close(); CatalogService catalog = testingCluster.getMaster().getCatalog(); assertTrue(catalog.existsTable(getCurrentDatabase(), tableName)); res = executeQuery(); res.close(); TableDesc desc = catalog.getTableDesc(getCurrentDatabase(), tableName); if (!testingCluster.isHiveCatalogStoreRunning()) { assertEquals(2, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(testingCluster.getConfiguration()); assertTrue(fs.exists(new Path(desc.getUri()))); CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration()); for (FileStatus file : fs.listStatus(new Path(desc.getUri()))) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } executeString("DROP TABLE " + tableName + " PURGE"); }
@Test public final void testInsertOverwriteLocationWithCompression() throws Exception { if (!testingCluster.isHiveCatalogStoreRunning()) { ResultSet res = executeQuery(); res.close(); FileSystem fs = FileSystem.get(testingCluster.getConfiguration()); Path path = new Path("/tajo-data/testInsertOverwriteLocationWithCompression"); assertTrue(fs.exists(path)); assertEquals(1, fs.listStatus(path).length); CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration()); for (FileStatus file : fs.listStatus(path)){ CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } }
@Test public final void testInsertOverwriteWithCompression() throws Exception { String tableName = "testInsertOverwriteWithCompression"; ResultSet res = tpch.execute("create table " + tableName + " (col1 int4, col2 int4, col3 float8) USING csv WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec')"); res.close(); TajoTestingCluster cluster = tpch.getTestingCluster(); CatalogService catalog = cluster.getMaster().getCatalog(); assertTrue(catalog.existsTable(tableName)); res = tpch.execute("insert overwrite into " + tableName + " select l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey = 3"); res.close(); TableDesc desc = catalog.getTableDesc(tableName); assertEquals(2, desc.getStats().getNumRows().intValue()); FileSystem fs = FileSystem.get(tpch.getTestingCluster().getConfiguration()); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(tpch.getTestingCluster().getConfiguration()); for (FileStatus file : fs.listStatus(desc.getPath())){ CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } }
@Test public final void testInsertOverwriteWithCompression() throws Exception { String tableName = CatalogUtil.normalizeIdentifier("testInsertOverwriteWithCompression"); ResultSet res = tpch.execute("create table " + tableName + " (col1 int4, col2 int4, col3 float8) USING csv WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec')"); res.close(); TajoTestingCluster cluster = tpch.getTestingCluster(); CatalogService catalog = cluster.getMaster().getCatalog(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = tpch.execute("insert overwrite into " + tableName + " select l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey = 3"); res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!cluster.isHCatalogStoreRunning()) { assertEquals(2, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(tpch.getTestingCluster().getConfiguration()); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(tpch.getTestingCluster().getConfiguration()); for (FileStatus file : fs.listStatus(desc.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } }
@Test public final void testInsertOverwritePathWithNonFromQuery() throws Exception { ResultSet res = executeString("insert overwrite into location " + "'/tajo-data/testInsertOverwritePathWithNonFromQuery' " + "USING text WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "select 1::INT4, 2.1::FLOAT4, 'test'"); res.close(); FileSystem fs = FileSystem.get(testingCluster.getConfiguration()); Path path = new Path("/tajo-data/testInsertOverwritePathWithNonFromQuery"); assertTrue(fs.exists(path)); assertEquals(1, fs.listStatus(path).length); CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration()); FileStatus file = fs.listStatus(path)[0]; CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); try (BufferedReader reader = new BufferedReader( new InputStreamReader(codec.createInputStream(fs.open(file.getPath()))))) { String line = reader.readLine(); assertNotNull(line); String[] tokens = line.split("\\|"); assertEquals(3, tokens.length); assertEquals("1", tokens[0]); assertEquals("2.1", tokens[1]); assertEquals("test", tokens[2]); } }
@Test public final void testColumnPartitionedTableByOneColumnsWithCompression() throws Exception { String tableName = "testColumnPartitionedTableByOneColumnsWithCompression"; ResultSet res = executeString( "create table " + tableName + " (col2 int4, col3 float8) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION BY column(col1 int4)"); res.close(); assertTrue(catalog.existsTable(tableName)); res = executeString( "insert overwrite into " + tableName + " select l_partkey, l_quantity, l_orderkey from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(tableName); assertEquals(5, desc.getStats().getNumRows().intValue()); FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = desc.getPath(); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); for (FileStatus partition : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition.getPath())); for (FileStatus file : fs.listStatus(partition.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } }
@Test public final void testInsertOverwriteLocationWithCompression() throws Exception { ResultSet res = tpch.execute("insert overwrite into location '/tajo-data/testInsertOverwriteLocationWithCompression' USING csv WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') select * from lineitem where l_orderkey = 3"); res.close(); FileSystem fs = FileSystem.get(tpch.getTestingCluster().getConfiguration()); Path path = new Path("/tajo-data/testInsertOverwriteLocationWithCompression"); assertTrue(fs.exists(path)); assertEquals(1, fs.listStatus(path).length); CompressionCodecFactory factory = new CompressionCodecFactory(tpch.getTestingCluster().getConfiguration()); for (FileStatus file : fs.listStatus(path)){ CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } }
@Test public final void testColumnPartitionedTableByOneColumnsWithCompression() throws Exception { String tableName = CatalogUtil.normalizeIdentifier("testColumnPartitionedTableByOneColumnsWithCompression"); ResultSet res = executeString( "create table " + tableName + " (col2 int4, col3 float8) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION BY column(col1 int4)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = executeString( "insert overwrite into " + tableName + " select l_partkey, l_quantity, l_orderkey from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!testingCluster.isHCatalogStoreRunning()) { assertEquals(5, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = desc.getPath(); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); for (FileStatus partition : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition.getPath())); for (FileStatus file : fs.listStatus(partition.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } }
@Test public final void testColumnPartitionedTableByOneColumnsWithCompression() throws Exception { ResultSet res = null; String tableName = IdentifierUtil.normalizeIdentifier("testColumnPartitionedTableByOneColumnsWithCompression"); if (nodeType == NodeType.INSERT) { res = executeString( "create table " + tableName + " (col2 int4, col3 float8) USING text " + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION BY column(col1 int4)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = executeString( "insert overwrite into " + tableName + " select l_partkey, l_quantity, l_orderkey from lineitem"); } else { res = executeString( "create table " + tableName + " (col2 int4, col3 float8) USING text " + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION BY column(col1 int4) as select l_partkey, l_quantity, l_orderkey from lineitem"); } res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!testingCluster.isHiveCatalogStoreRunning()) { assertEquals(8, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(new Path(desc.getUri()))); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = new Path(desc.getUri()); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); for (FileStatus partition : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition.getPath())); for (FileStatus file : fs.listStatus(partition.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } verifyPartitionDirectoryFromCatalog(DEFAULT_DATABASE_NAME, tableName, new String[]{"col1"}, desc.getStats().getNumRows()); executeString("DROP TABLE " + tableName + " PURGE").close(); }
@Test public final void testColumnPartitionedTableByTwoColumnsWithCompression() throws Exception { ResultSet res = null; String tableName = IdentifierUtil.normalizeIdentifier("testColumnPartitionedTableByTwoColumnsWithCompression"); if (nodeType == NodeType.INSERT) { res = executeString("create table " + tableName + " (col3 float8, col4 text) USING text " + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION by column(col1 int4, col2 int4)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = executeString( "insert overwrite into " + tableName + " select l_quantity, l_returnflag, l_orderkey, l_partkey from lineitem"); } else { res = executeString("create table " + tableName + " (col3 float8, col4 text) USING text " + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION by column(col1 int4, col2 int4) as select l_quantity, l_returnflag, l_orderkey, " + "l_partkey from lineitem"); } res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!testingCluster.isHiveCatalogStoreRunning()) { assertEquals(8, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(new Path(desc.getUri()))); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = new Path(desc.getUri()); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); for (FileStatus partition1 : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus file : fs.listStatus(partition2.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } verifyPartitionDirectoryFromCatalog(DEFAULT_DATABASE_NAME, tableName, new String[]{"col1", "col2"}, desc.getStats().getNumRows()); executeString("DROP TABLE " + tableName + " PURGE").close(); }
@Test public final void testColumnPartitionedTableNoMatchedPartition() throws Exception { ResultSet res = null; String tableName = IdentifierUtil.normalizeIdentifier("testColumnPartitionedTableNoMatchedPartition"); if (nodeType == NodeType.INSERT) { res = executeString( "create table " + tableName + " (col4 text) USING text " + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = executeString( "insert overwrite into " + tableName + " select l_returnflag , l_orderkey, l_partkey, l_quantity from lineitem"); } else { res = executeString("create table " + tableName + " (col4 text) USING text " + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8) as select l_returnflag , l_orderkey, l_partkey, " + "l_quantity from lineitem"); } res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!testingCluster.isHiveCatalogStoreRunning()) { assertEquals(8, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(new Path(desc.getUri()))); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = new Path(desc.getUri()); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); for (FileStatus partition1 : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus partition3 : fs.listStatus(partition2.getPath())) { assertTrue(fs.isDirectory(partition3.getPath())); for (FileStatus file : fs.listStatus(partition3.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } } res = executeString("select * from " + tableName + " where col2 = 9"); assertFalse(res.next()); res.close(); verifyPartitionDirectoryFromCatalog(DEFAULT_DATABASE_NAME, tableName, new String[]{"col1", "col2", "col3"}, desc.getStats().getNumRows()); executeString("DROP TABLE " + tableName + " PURGE").close(); }
@Test public final void testColumnPartitionedTableByTwoColumnsWithCompression() throws Exception { String tableName = "testColumnPartitionedTableByTwoColumnsWithCompression"; ResultSet res = executeString("create table " + tableName + " (col3 float8, col4 text) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION by column(col1 int4, col2 int4)"); res.close(); assertTrue(catalog.existsTable(tableName)); res = executeString( "insert overwrite into " + tableName + " select l_quantity, l_returnflag, l_orderkey, l_partkey from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(tableName); assertEquals(5, desc.getStats().getNumRows().intValue()); FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = desc.getPath(); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); for (FileStatus partition1 : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus file : fs.listStatus(partition2.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } }
@Test public final void testColumnPartitionedTableByThreeColumnsWithCompression() throws Exception { String tableName = "testColumnPartitionedTableByThreeColumnsWithCompression"; ResultSet res = executeString( "create table " + tableName + " (col4 text) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8)"); res.close(); assertTrue(catalog.existsTable(tableName)); res = executeString( "insert overwrite into " + tableName + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(tableName); assertEquals(5, desc.getStats().getNumRows().intValue()); FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = desc.getPath(); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); for (FileStatus partition1 : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus partition3 : fs.listStatus(partition2.getPath())) { assertTrue(fs.isDirectory(partition3.getPath())); for (FileStatus file : fs.listStatus(partition3.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } } res = executeString("select * from " + tableName + " where col2 = 2"); Map<Double, int []> resultRows1 = Maps.newHashMap(); resultRows1.put(45.0d, new int[]{3, 2}); resultRows1.put(38.0d, new int[]{2, 2}); int i = 0; while (res.next()) { assertEquals(resultRows1.get(res.getDouble(4))[0], res.getInt(2)); assertEquals(resultRows1.get(res.getDouble(4))[1], res.getInt(3)); i++; } res.close(); assertEquals(2, i); Map<Double, int []> resultRows2 = Maps.newHashMap(); resultRows2.put(49.0d, new int[]{3, 3}); resultRows2.put(45.0d, new int[]{3, 2}); resultRows2.put(38.0d, new int[]{2, 2}); res = executeString("select * from " + tableName + " where (col1 = 2 or col1 = 3) and col2 >= 2"); i = 0; while(res.next()) { assertEquals(resultRows2.get(res.getDouble(4))[0], res.getInt(2)); assertEquals(resultRows2.get(res.getDouble(4))[1], res.getInt(3)); i++; } res.close(); assertEquals(3, i); }
@Test public final void testColumnPartitionedTableNoMatchedPartition() throws Exception { String tableName = "testColumnPartitionedTableNoMatchedPartition"; ResultSet res = executeString( "create table " + tableName + " (col4 text) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8)"); res.close(); assertTrue(catalog.existsTable(tableName)); res = executeString( "insert overwrite into " + tableName + " select l_returnflag , l_orderkey, l_partkey, l_quantity from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(tableName); assertEquals(5, desc.getStats().getNumRows().intValue()); FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = desc.getPath(); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); for (FileStatus partition1 : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus partition3 : fs.listStatus(partition2.getPath())) { assertTrue(fs.isDirectory(partition3.getPath())); for (FileStatus file : fs.listStatus(partition3.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } } res = executeString("select * from " + tableName + " where col2 = 9"); assertFalse(res.next()); res.close(); }
@Test public final void testColumnPartitionedTableByTwoColumnsWithCompression() throws Exception { String tableName = CatalogUtil.normalizeIdentifier("testColumnPartitionedTableByTwoColumnsWithCompression"); ResultSet res = executeString("create table " + tableName + " (col3 float8, col4 text) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION by column(col1 int4, col2 int4)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = executeString( "insert overwrite into " + tableName + " select l_quantity, l_returnflag, l_orderkey, l_partkey from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!testingCluster.isHCatalogStoreRunning()) { assertEquals(5, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = desc.getPath(); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); for (FileStatus partition1 : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus file : fs.listStatus(partition2.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } }
@Test public final void testColumnPartitionedTableNoMatchedPartition() throws Exception { String tableName = CatalogUtil.normalizeIdentifier("testColumnPartitionedTableNoMatchedPartition"); ResultSet res = executeString( "create table " + tableName + " (col4 text) USING csv " + "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); res = executeString( "insert overwrite into " + tableName + " select l_returnflag , l_orderkey, l_partkey, l_quantity from lineitem"); res.close(); TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); if (!testingCluster.isHCatalogStoreRunning()) { assertEquals(5, desc.getStats().getNumRows().intValue()); } FileSystem fs = FileSystem.get(conf); assertTrue(fs.exists(desc.getPath())); CompressionCodecFactory factory = new CompressionCodecFactory(conf); Path path = desc.getPath(); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); for (FileStatus partition1 : fs.listStatus(path)){ assertTrue(fs.isDirectory(partition1.getPath())); for (FileStatus partition2 : fs.listStatus(partition1.getPath())) { assertTrue(fs.isDirectory(partition2.getPath())); for (FileStatus partition3 : fs.listStatus(partition2.getPath())) { assertTrue(fs.isDirectory(partition3.getPath())); for (FileStatus file : fs.listStatus(partition3.getPath())) { CompressionCodec codec = factory.getCodec(file.getPath()); assertTrue(codec instanceof DeflateCodec); } } } } res = executeString("select * from " + tableName + " where col2 = 9"); assertFalse(res.next()); res.close(); }
private static CompressionCodec getCodec(Configuration configuration) { if (ZlibFactory.isNativeZlibLoaded(configuration)) { return new GzipCodec(); } return new DeflateCodec(); }