@JsonCreator public HDFSTableLayoutHandle( @JsonProperty("table") HDFSTableHandle table, @JsonProperty("fiberColumn") HDFSColumnHandle fiberColumn, @JsonProperty("timestampColumn") HDFSColumnHandle timestampColumn, @JsonProperty("fiberFunction") Function fiberFunction, @JsonProperty("storageFormat") StorageFormat storageFormat, @JsonProperty("predicates") Optional<TupleDomain<ColumnHandle>> predicates) { this.table = requireNonNull(table, "table is null"); this.fiberColumn = requireNonNull(fiberColumn, "fiberColumn is null"); this.timestampColumn = requireNonNull(timestampColumn, "timestampColumn is null"); this.fiberFunction = requireNonNull(fiberFunction, "fiberFunc is null"); this.storageFormat = requireNonNull(storageFormat, "storageFormat is null"); this.predicates = requireNonNull(predicates, "predicates is null"); }
@Override public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout) { KuduTableLayoutHandle layoutHandle = checkType(layout, KuduTableLayoutHandle.class, "layout"); KuduTableHandle tableHandle = layoutHandle.getTable(); KuduClient kuduClient = kuduClientManager.getClient(); List<KuduScanToken> tokens = kuduClientManager.newScanTokenBuilder(kuduClient, tableHandle.getSchemaTableName().getTableName()).build(); TupleDomain<KuduColumnHandle> effectivePredicate = layoutHandle.getConstraint() .transform(handle -> checkType(handle, KuduColumnHandle.class, "columnHandle")); ImmutableList.Builder<ConnectorSplit> builder = ImmutableList.builder(); for (int i = 0; i < tokens.size(); i++) { // nodeManager.getWorkerNodes() List<HostAddress> hostAddresses = nodeManager.getWorkerNodes().stream() .map(node -> node.getHostAndPort()).collect(Collectors.toList()); ConnectorSplit split = new KuduSplit(hostAddresses, tableHandle.getSchemaTableName(), i, effectivePredicate); builder.add(split); } kuduClientManager.close(kuduClient); return new FixedSplitSource(builder.build()); }
public KuduRecordCursor(KuduClientManager kuduClientManager, int kuduTokenId, List<KuduColumnHandle> columns, SchemaTableName tableName, TupleDomain<KuduColumnHandle> predicate) { this.kuduClientManager = requireNonNull(kuduClientManager, "kuduClientManager is null"); this.columns = requireNonNull(columns, "columns is null"); fieldToColumnIndex = new int[columns.size()]; for (int i = 0; i < columns.size(); i++) { KuduColumnHandle columnHandle = columns.get(i); fieldToColumnIndex[i] = columnHandle.getOrdinalPosition(); } this.kuduClient = requireNonNull(kuduClientManager.getClient(), "kuduClient is null"); List<KuduScanToken> tokends = kuduClientManager .newScanTokenBuilder(this.kuduClient, tableName.getTableName()) .setProjectedColumnNames(columns.stream().map(column->column.getColumnName()).collect(Collectors.toList())) .build(); try { this.kuduScanner = tokends.get(kuduTokenId).intoScanner(this.kuduClient); } catch (Exception e) { logger.error(e, e.getMessage()); } }
public static PreparedStatement create( Connection connection, String sql, List<String> columnNames, List<Type> types, Set<Integer> uuidColumnIndexes, TupleDomain<Integer> tupleDomain) throws SQLException { checkArgument(!isNullOrEmpty(sql), "sql is null or empty"); List<ValueBuffer> bindValues = new ArrayList<>(256); sql = sql + getWhereClause(tupleDomain, columnNames, types, uuidColumnIndexes, bindValues); PreparedStatement statement = connection.prepareStatement(sql, TYPE_FORWARD_ONLY, CONCUR_READ_ONLY); enableStreamingResults(statement); // bind values to statement int bindIndex = 1; for (ValueBuffer value : bindValues) { bindField(value, statement, bindIndex, uuidColumnIndexes.contains(value.getColumnIndex())); bindIndex++; } return statement; }
private Optional<Map<ColumnHandle, NullableValue>> parseValuesAndFilterPartition(String partitionName, List<HiveColumnHandle> partitionColumns, TupleDomain<ColumnHandle> predicate) { checkArgument(predicate.getDomains().isPresent()); List<String> partitionValues = extractPartitionKeyValues(partitionName); Map<ColumnHandle, Domain> domains = predicate.getDomains().get(); ImmutableMap.Builder<ColumnHandle, NullableValue> builder = ImmutableMap.builder(); for (int i = 0; i < partitionColumns.size(); i++) { HiveColumnHandle column = partitionColumns.get(i); NullableValue parsedValue = parsePartitionValue(partitionName, partitionValues.get(i), column.getHiveType(), timeZone); Domain allowedDomain = domains.get(column); if (allowedDomain != null && !allowedDomain.includesNullableValue(parsedValue.getValue())) { return Optional.empty(); } builder.put(column, parsedValue); } return Optional.of(builder.build()); }
@Override public Optional<HiveRecordCursor> createHiveRecordCursor( String clientId, Configuration configuration, ConnectorSession session, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { RecordReader<?, ?> recordReader = HiveUtil.createRecordReader(configuration, path, start, length, schema, columns); return Optional.<HiveRecordCursor>of(new GenericHiveRecordCursor<>( genericRecordReader(recordReader), length, schema, partitionKeys, columns, hiveStorageTimeZone, typeManager)); }
private List<ShardInfo> compact(StoragePageSink storagePageSink, Set<UUID> uuids, List<Long> columnIds, List<Type> columnTypes) throws IOException { for (UUID uuid : uuids) { try (ConnectorPageSource pageSource = storageManager.getPageSource(uuid, columnIds, columnTypes, TupleDomain.all(), readerAttributes)) { while (!pageSource.isFinished()) { Page page = pageSource.getNextPage(); if (isNullOrEmptyPage(page)) { continue; } storagePageSink.appendPages(ImmutableList.of(page)); if (storagePageSink.isFull()) { storagePageSink.flush(); } } } } return storagePageSink.commit(); }
@Override public boolean matches(long numberOfRows, Map<Integer, Statistics<?>> statisticsByColumnIndex) { if (numberOfRows == 0) { return false; } ImmutableMap.Builder<C, Domain> domains = ImmutableMap.builder(); for (ColumnReference<C> columnReference : columnReferences) { Statistics<?> statistics = statisticsByColumnIndex.get(columnReference.getOrdinal()); Domain domain = getDomain(columnReference.getType(), numberOfRows, statistics); if (domain != null) { domains.put(columnReference.getColumn(), domain); } } TupleDomain<C> stripeDomain = TupleDomain.withColumnDomains(domains.build()); return effectivePredicate.overlaps(stripeDomain); }
@Test public void testNoSchemaFilter() throws Exception { // Create "orders" table in a different schema metadata.createTable(SESSION, tableMetadataBuilder(new SchemaTableName("other", "orders")) .column("orderkey", BIGINT) .build()); // Create another table that should not be selected metadata.createTable(SESSION, tableMetadataBuilder(new SchemaTableName("schema1", "foo")) .column("orderkey", BIGINT) .build()); TupleDomain<Integer> tupleDomain = TupleDomain.withColumnDomains( ImmutableMap.<Integer, Domain>builder() .put(1, Domain.singleValue(VARCHAR, utf8Slice("orders"))) .build()); MetadataDao metadataDao = dummyHandle.attach(MetadataDao.class); Set<Long> actual = ImmutableSet.copyOf(ShardMetadataRecordCursor.getTableIds(dbi, tupleDomain)); Set<Long> expected = ImmutableSet.of( metadataDao.getTableInformation("other", "orders").getTableId(), metadataDao.getTableInformation("test", "orders").getTableId()); assertEquals(actual, expected); }
@Test public void testNoTableFilter() throws Exception { // Create "orders" table in a different schema metadata.createTable(SESSION, tableMetadataBuilder(new SchemaTableName("test", "orders2")) .column("orderkey", BIGINT) .build()); // Create another table that should not be selected metadata.createTable(SESSION, tableMetadataBuilder(new SchemaTableName("schema1", "foo")) .column("orderkey", BIGINT) .build()); TupleDomain<Integer> tupleDomain = TupleDomain.withColumnDomains( ImmutableMap.<Integer, Domain>builder() .put(0, Domain.singleValue(VARCHAR, utf8Slice("test"))) .build()); MetadataDao metadataDao = dummyHandle.attach(MetadataDao.class); Set<Long> actual = ImmutableSet.copyOf(ShardMetadataRecordCursor.getTableIds(dbi, tupleDomain)); Set<Long> expected = ImmutableSet.of( metadataDao.getTableInformation("test", "orders").getTableId(), metadataDao.getTableInformation("test", "orders2").getTableId()); assertEquals(actual, expected); }
@Test public void testGetPartitionSplitsTableOfflinePartition() throws Exception { ConnectorSession session = newSession(); ConnectorTableHandle tableHandle = getTableHandle(tableOfflinePartition); assertNotNull(tableHandle); ColumnHandle dsColumn = metadata.getColumnHandles(session, tableHandle).get("ds"); assertNotNull(dsColumn); Domain domain = Domain.singleValue(VARCHAR, utf8Slice("2012-12-30")); TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(dsColumn, domain)); List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, tableHandle, new Constraint<>(tupleDomain, bindings -> true), Optional.empty()); try { getSplitCount(splitManager.getSplits(session, getOnlyElement(tableLayoutResults).getTableLayout().getHandle())); fail("Expected PartitionOfflineException"); } catch (PartitionOfflineException e) { assertEquals(e.getTableName(), tableOfflinePartition); assertEquals(e.getPartition(), "ds=2012-12-30"); } }
@Override public boolean matches(Map<Integer, ParquetDictionaryDescriptor> dictionariesByColumnIndex) { ImmutableMap.Builder<C, Domain> domains = ImmutableMap.builder(); for (ColumnReference<C> columnReference : columnReferences) { ParquetDictionaryDescriptor dictionaryDescriptor = dictionariesByColumnIndex.get(columnReference.getOrdinal()); Domain domain = getDomain(columnReference.getType(), dictionaryDescriptor); if (domain != null) { domains.put(columnReference.getColumn(), domain); } } TupleDomain<C> stripeDomain = TupleDomain.withColumnDomains(domains.build()); return effectivePredicate.overlaps(stripeDomain); }
@Override public List<ConnectorTableLayoutResult> getTableLayouts( ConnectorSession session, ConnectorTableHandle handle, Constraint<ColumnHandle> constraint, Optional<Set<ColumnHandle>> desiredColumns) { requireNonNull(handle, "handle is null"); checkArgument(handle instanceof BlackHoleTableHandle); BlackHoleTableHandle blackHoleHandle = (BlackHoleTableHandle) handle; BlackHoleTableLayoutHandle layoutHandle = new BlackHoleTableLayoutHandle( blackHoleHandle.getSplitCount(), blackHoleHandle.getPagesPerSplit(), blackHoleHandle.getRowsPerPage(), blackHoleHandle.getFieldsLength()); return ImmutableList.of(new ConnectorTableLayoutResult(getTableLayout(session, layoutHandle), TupleDomain.all())); }
@Override public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableLayoutHandle layout) { JmxTableLayoutHandle jmxLayout = checkType(layout, JmxTableLayoutHandle.class, "layout"); JmxTableHandle tableHandle = jmxLayout.getTable(); TupleDomain<ColumnHandle> predicate = jmxLayout.getConstraint(); //TODO is there a better way to get the node column? JmxColumnHandle nodeColumnHandle = tableHandle.getColumns().get(0); List<ConnectorSplit> splits = nodeManager.getNodes(ACTIVE) .stream() .filter(node -> { NullableValue value = NullableValue.of(VARCHAR, utf8Slice(node.getNodeIdentifier())); return predicate.overlaps(fromFixedValues(ImmutableMap.of(nodeColumnHandle, value))); }) .map(node -> new JmxSplit(tableHandle, ImmutableList.of(node.getHostAndPort()))) .collect(toList()); return new FixedSplitSource(connectorId, splits); }
@Test public void testPredicatePushdown() throws Exception { for (Node node : nodes) { String nodeIdentifier = node.getNodeIdentifier(); TupleDomain<ColumnHandle> nodeTupleDomain = TupleDomain.fromFixedValues(ImmutableMap.of(columnHandle, NullableValue.of(VARCHAR, utf8Slice(nodeIdentifier)))); ConnectorTableLayoutHandle layout = new JmxTableLayoutHandle(tableHandle, nodeTupleDomain); ConnectorSplitSource splitSource = splitManager.getSplits(JmxTransactionHandle.INSTANCE, SESSION, layout); List<ConnectorSplit> allSplits = getAllSplits(splitSource); assertEquals(allSplits.size(), 1); assertEquals(allSplits.get(0).getAddresses().size(), 1); assertEquals(allSplits.get(0).getAddresses().get(0).getHostText(), nodeIdentifier); } }
@Test public void testRecordSetProvider() throws Exception { for (SchemaTableName schemaTableName : metadata.listTables(SESSION, "jmx")) { JmxTableHandle tableHandle = metadata.getTableHandle(SESSION, schemaTableName); List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(SESSION, tableHandle).values()); ConnectorTableLayoutHandle layout = new JmxTableLayoutHandle(tableHandle, TupleDomain.all()); ConnectorSplitSource splitSource = splitManager.getSplits(JmxTransactionHandle.INSTANCE, SESSION, layout); List<ConnectorSplit> allSplits = getAllSplits(splitSource); assertEquals(allSplits.size(), nodes.size()); ConnectorSplit split = allSplits.get(0); RecordSet recordSet = recordSetProvider.getRecordSet(JmxTransactionHandle.INSTANCE, SESSION, split, columnHandles); try (RecordCursor cursor = recordSet.cursor()) { while (cursor.advanceNextPosition()) { for (int i = 0; i < recordSet.getColumnTypes().size(); i++) { cursor.isNull(i); } } } } }
@BeforeMethod public void setUp() throws Exception { scanAssignments = ImmutableMap.<Symbol, ColumnHandle>builder() .put(A, new TestingColumnHandle("a")) .put(B, new TestingColumnHandle("b")) .put(C, new TestingColumnHandle("c")) .put(D, new TestingColumnHandle("d")) .put(E, new TestingColumnHandle("e")) .put(F, new TestingColumnHandle("f")) .build(); Map<Symbol, ColumnHandle> assignments = Maps.filterKeys(scanAssignments, Predicates.in(ImmutableList.of(A, B, C, D, E, F))); baseTableScan = new TableScanNode( newId(), DUAL_TABLE_HANDLE, ImmutableList.copyOf(assignments.keySet()), assignments, Optional.empty(), TupleDomain.all(), null ); expressionNormalizer = new ExpressionIdentityNormalizer(); }
public static Optional<String> stringFilter(TupleDomain<Integer> constraint, int index) { if (constraint.isNone()) { return Optional.empty(); } Domain domain = constraint.getDomains().get().get(index); if ((domain == null) || !domain.isSingleValue()) { return Optional.empty(); } Object value = domain.getSingleValue(); if (value instanceof Slice) { return Optional.of(((Slice) value).toStringUtf8()); } return Optional.empty(); }
@Test public void testToPredicateAllIgnored() throws Exception { TupleDomain<Symbol> tupleDomain = withColumnDomains(ImmutableMap.<Symbol, Domain>builder() .put(A, Domain.singleValue(BIGINT, 1L)) .put(B, Domain.onlyNull(DOUBLE)) .put(C, Domain.notNull(VARCHAR)) .put(D, Domain.all(BOOLEAN)) .build()); ExtractionResult result = fromPredicate(toPredicate(tupleDomain)); assertEquals(result.getRemainingExpression(), TRUE_LITERAL); assertEquals(result.getTupleDomain(), withColumnDomains(ImmutableMap.<Symbol, Domain>builder() .put(A, Domain.singleValue(BIGINT, 1L)) .put(B, Domain.onlyNull(DOUBLE)) .put(C, Domain.notNull(VARCHAR)) .build())); }
private static RecordSet toRecordSet(ConnectorTransactionHandle sourceTransaction, SystemTable table, ConnectorSession session, TupleDomain<Integer> constraint) { return new RecordSet() { private final List<Type> types = table.getTableMetadata().getColumns().stream() .map(ColumnMetadata::getType) .collect(toImmutableList()); @Override public List<Type> getColumnTypes() { return types; } @Override public RecordCursor cursor() { return table.cursor(sourceTransaction, session, constraint); } }; }
public MockRemoteTask createTableScanTask(TaskId taskId, Node newNode, List<Split> splits, PartitionedSplitCountTracker partitionedSplitCountTracker) { Symbol symbol = new Symbol("column"); PlanNodeId sourceId = new PlanNodeId("sourceId"); PlanFragment testFragment = new PlanFragment( new PlanFragmentId("test"), new TableScanNode( sourceId, new TableHandle("test", new TestingTableHandle()), ImmutableList.of(symbol), ImmutableMap.of(symbol, new TestingColumnHandle("column")), Optional.empty(), TupleDomain.all(), null), ImmutableMap.<Symbol, Type>of(symbol, VARCHAR), ImmutableList.of(symbol), SOURCE, sourceId, Optional.empty()); ImmutableMultimap.Builder<PlanNodeId, Split> initialSplits = ImmutableMultimap.builder(); for (Split sourceSplit : splits) { initialSplits.put(sourceId, sourceSplit); } return createRemoteTask(TEST_SESSION, taskId, newNode, 0, testFragment, initialSplits.build(), OutputBuffers.INITIAL_EMPTY_OUTPUT_BUFFERS, partitionedSplitCountTracker); }
@JsonCreator public TableScanNode( @JsonProperty("id") PlanNodeId id, @JsonProperty("table") TableHandle table, @JsonProperty("outputSymbols") List<Symbol> outputs, @JsonProperty("assignments") Map<Symbol, ColumnHandle> assignments, @JsonProperty("layout") Optional<TableLayoutHandle> tableLayout, @JsonProperty("currentConstraint") TupleDomain<ColumnHandle> currentConstraint, @JsonProperty("originalConstraint") @Nullable Expression originalConstraint) { super(id); requireNonNull(table, "table is null"); requireNonNull(outputs, "outputs is null"); requireNonNull(assignments, "assignments is null"); checkArgument(assignments.keySet().containsAll(outputs), "assignments does not cover all of outputs"); requireNonNull(tableLayout, "tableLayout is null"); requireNonNull(currentConstraint, "currentConstraint is null"); this.table = table; this.outputSymbols = ImmutableList.copyOf(outputs); this.assignments = ImmutableMap.copyOf(assignments); this.originalConstraint = originalConstraint; this.tableLayout = tableLayout; this.currentConstraint = currentConstraint; }
private static TupleDomain<HiveColumnHandle> toCompactTupleDomain(TupleDomain<ColumnHandle> effectivePredicate, int threshold) { checkArgument(effectivePredicate.getDomains().isPresent()); ImmutableMap.Builder<HiveColumnHandle, Domain> builder = ImmutableMap.builder(); for (Map.Entry<ColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) { HiveColumnHandle hiveColumnHandle = checkType(entry.getKey(), HiveColumnHandle.class, "ConnectorColumnHandle"); ValueSet values = entry.getValue().getValues(); ValueSet compactValueSet = values.getValuesProcessor().<Optional<ValueSet>>transform( ranges -> ranges.getRangeCount() > threshold ? Optional.of(ValueSet.ofRanges(ranges.getSpan())) : Optional.empty(), discreteValues -> discreteValues.getValues().size() > threshold ? Optional.of(ValueSet.all(values.getType())) : Optional.empty(), allOrNone -> Optional.empty()) .orElse(values); builder.put(hiveColumnHandle, Domain.create(compactValueSet, entry.getValue().isNullAllowed())); } return TupleDomain.withColumnDomains(builder.build()); }
public HiveFileIterator( Path path, FileSystem fileSystem, DirectoryLister directoryLister, NamenodeStats namenodeStats, String partitionName, InputFormat<?, ?> inputFormat, Properties schema, List<HivePartitionKey> partitionKeys, TupleDomain<HiveColumnHandle> effectivePredicate) { this.partitionName = requireNonNull(partitionName, "partitionName is null"); this.inputFormat = requireNonNull(inputFormat, "inputFormat is null"); this.schema = requireNonNull(schema, "schema is null"); this.partitionKeys = requireNonNull(partitionKeys, "partitionKeys is null"); this.effectivePredicate = requireNonNull(effectivePredicate, "effectivePredicate is null"); this.path = requireNonNull(path, "path is null"); this.fileSystem = requireNonNull(fileSystem, "fileSystem is null"); this.directoryLister = requireNonNull(directoryLister, "directoryLister is null"); this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null"); }
@Override public List<ConnectorTableLayoutResult> getTableLayouts(ConnectorSession connectorSession, ConnectorTableHandle connectorTableHandle, Constraint<ColumnHandle> constraint, Optional<Set<ColumnHandle>> optional) { RestTableHandle tableHandle = Types.checkType(connectorTableHandle, RestTableHandle.class, "tableHandle"); return ImmutableList.of( new ConnectorTableLayoutResult( getTableLayout(connectorSession, new RestConnectorTableLayoutHandle(tableHandle)), TupleDomain.all())); }
@JsonCreator public KuduTableLayoutHandle( @JsonProperty("table") KuduTableHandle table, @JsonProperty("constraint") TupleDomain<ColumnHandle> constraint) { this.table = requireNonNull(table, "table is null"); this.constraint = requireNonNull(constraint, "constraint is null"); }
@JsonCreator public KuduSplit( @JsonProperty("addresses") List<HostAddress> addresses, @JsonProperty("tableName") SchemaTableName tableName, @JsonProperty("kuduTokenId") int kuduTokenId, @JsonProperty("effectivePredicate") TupleDomain<KuduColumnHandle> effectivePredicate) { this.addresses = addresses; this.tableName = requireNonNull(tableName, "tableName is null"); this.kuduTokenId = requireNonNull(kuduTokenId, "kuduScanToken is null"); this.effectivePredicate = effectivePredicate; }
public ShardMetadataRecordCursor(IDBI dbi, TupleDomain<Integer> tupleDomain) { requireNonNull(dbi, "dbi is null"); this.dbi = dbi; this.metadataDao = onDemandDao(dbi, MetadataDao.class); this.tupleDomain = requireNonNull(tupleDomain, "tupleDomain is null"); this.tableIds = getTableIds(dbi, tupleDomain); this.columnNames = createQualifiedColumnNames(); this.resultSetValues = new ResultSetValues(TYPES); this.resultSet = getNextResultSet(); }
@Override public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableLayoutHandle layout) { RaptorTableLayoutHandle handle = checkType(layout, RaptorTableLayoutHandle.class, "layout"); RaptorTableHandle table = handle.getTable(); TupleDomain<RaptorColumnHandle> effectivePredicate = toRaptorTupleDomain(handle.getConstraint()); return new RaptorSplitSource(table.getTableId(), effectivePredicate, table.getTransactionId()); }
public RaptorSplitSource(long tableId, TupleDomain<RaptorColumnHandle> effectivePredicate, OptionalLong transactionId) { this.tableId = tableId; this.effectivePredicate = requireNonNull(effectivePredicate, "effectivePredicate is null"); this.transactionId = requireNonNull(transactionId, "transactionId is null"); this.iterator = new SynchronizedResultIterator<>(shardManager.getShardNodes(tableId, effectivePredicate)); }
@JsonCreator public RaptorSplit( @JsonProperty("connectorId") String connectorId, @JsonProperty("shardUuid") UUID shardUuid, @JsonProperty("effectivePredicate") TupleDomain<RaptorColumnHandle> effectivePredicate, @JsonProperty("transactionId") OptionalLong transactionId) { this(connectorId, shardUuid, ImmutableList.of(), effectivePredicate, transactionId); }
private MaterializedResult readTable( ConnectorTableHandle tableHandle, List<ColumnHandle> columnHandles, ConnectorSession session, TupleDomain<ColumnHandle> tupleDomain, OptionalInt expectedSplitCount, Optional<HiveStorageFormat> expectedStorageFormat) throws Exception { List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, tableHandle, new Constraint<>(tupleDomain, bindings -> true), Optional.empty()); ConnectorTableLayoutHandle layoutHandle = getOnlyElement(tableLayoutResults).getTableLayout().getHandle(); List<ConnectorSplit> splits = getAllSplits(splitManager.getSplits(session, layoutHandle)); if (expectedSplitCount.isPresent()) { assertEquals(splits.size(), expectedSplitCount.getAsInt()); } ImmutableList.Builder<MaterializedRow> allRows = ImmutableList.builder(); for (ConnectorSplit split : splits) { try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(session, split, columnHandles)) { if (expectedStorageFormat.isPresent()) { assertPageSourceType(pageSource, expectedStorageFormat.get()); } MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); allRows.addAll(result.getMaterializedRows()); } } return new MaterializedResult(allRows.build(), getTypes(columnHandles)); }
private static long benchmarkReadBigint( FileSplit fileSplit, Properties partitionProperties, HiveRecordCursorProvider hiveRecordCursorProvider) throws Exception { HiveSplit split = createHiveSplit(fileSplit, partitionProperties); long sum = 0; for (int i = 0; i < LOOPS; i++) { sum = 0; HiveRecordCursor recordCursor = hiveRecordCursorProvider.createHiveRecordCursor( split.getClientId(), new Configuration(), SESSION, new Path(split.getPath()), split.getStart(), split.getLength(), split.getSchema(), BIGINT_COLUMN, split.getPartitionKeys(), TupleDomain.<HiveColumnHandle>all(), DateTimeZone.UTC, TYPE_MANAGER).get(); while (recordCursor.advanceNextPosition()) { if (!recordCursor.isNull(0)) { sum += recordCursor.getLong(0); } } recordCursor.close(); } return sum; }
private static HiveSplit createHiveSplit(FileSplit fileSplit, Properties partitionProperties) { return new HiveSplit("test", "test", "lineitem", "unpartitioned", fileSplit.getPath().toString(), fileSplit.getStart(), fileSplit.getLength(), partitionProperties, ImmutableList.<HivePartitionKey>of(), ImmutableList.<HostAddress>of(), false, TupleDomain.<HiveColumnHandle>all()); }
@Test public void testGetPartitionSplitsBatch() throws Exception { ConnectorSession session = newSession(); ConnectorTableHandle tableHandle = getTableHandle(tablePartitionFormat); List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, tableHandle, new Constraint<>(TupleDomain.all(), bindings -> true), Optional.empty()); ConnectorSplitSource splitSource = splitManager.getSplits(session, getOnlyElement(tableLayoutResults).getTableLayout().getHandle()); assertEquals(getSplitCount(splitSource), partitionCount); }
private static OrcPredicate getPredicate(TupleDomain<RaptorColumnHandle> effectivePredicate, Map<Long, Integer> indexMap) { ImmutableList.Builder<ColumnReference<RaptorColumnHandle>> columns = ImmutableList.builder(); for (RaptorColumnHandle column : effectivePredicate.getDomains().get().keySet()) { Integer index = indexMap.get(column.getColumnId()); if (index != null) { columns.add(new ColumnReference<>(column, index, column.getColumnType())); } } return new TupleDomainOrcPredicate<>(effectivePredicate, columns.build()); }
@Test public void testAssignShard() { long tableId = createTable("test"); UUID shard = UUID.randomUUID(); List<ShardInfo> shardNodes = ImmutableList.of(shardInfo(shard, "node1")); List<ColumnInfo> columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); shardManager.createTable(tableId, columns); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shardNodes, Optional.empty()); ShardNodes actual = getOnlyElement(getShardNodes(tableId, TupleDomain.all())); assertEquals(actual, new ShardNodes(shard, ImmutableSet.of("node1"))); shardManager.assignShard(tableId, shard, "node2"); // assign shard to another node actual = getOnlyElement(getShardNodes(tableId, TupleDomain.all())); assertEquals(actual, new ShardNodes(shard, ImmutableSet.of("node1", "node2"))); // assigning a shard should be idempotent shardManager.assignShard(tableId, shard, "node2"); // remove assignment from first node shardManager.unassignShard(tableId, shard, "node1"); actual = getOnlyElement(getShardNodes(tableId, TupleDomain.all())); assertEquals(actual, new ShardNodes(shard, ImmutableSet.of("node2"))); // removing an assignment should be idempotent shardManager.unassignShard(tableId, shard, "node1"); }
private static double benchmarkReadDouble( FileSplit fileSplit, Properties partitionProperties, HiveRecordCursorProvider hiveRecordCursorProvider) throws Exception { HiveSplit split = createHiveSplit(fileSplit, partitionProperties); double sum = 0; for (int i = 0; i < LOOPS; i++) { sum = 0; HiveRecordCursor recordCursor = hiveRecordCursorProvider.createHiveRecordCursor( split.getClientId(), new Configuration(), SESSION, new Path(split.getPath()), split.getStart(), split.getLength(), split.getSchema(), DOUBLE_COLUMN, split.getPartitionKeys(), TupleDomain.<HiveColumnHandle>all(), DateTimeZone.UTC, TYPE_MANAGER).get(); while (recordCursor.advanceNextPosition()) { if (!recordCursor.isNull(0)) { sum += recordCursor.getDouble(0); } } recordCursor.close(); } return sum; }
protected void doCreateTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception { ConnectorSession session = newSession(); // begin creating the table ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, CREATE_TABLE_COLUMNS, createTableProperties(storageFormat), session.getUser()); ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata); // write the data ConnectorPageSink sink = pageSinkProvider.createPageSink(session, outputHandle); sink.appendPage(CREATE_TABLE_DATA.toPage(), null); Collection<Slice> fragments = sink.finish(); // verify all new files start with the unique prefix for (String filePath : listAllDataFiles(outputHandle)) { assertTrue(new Path(filePath).getName().startsWith(getFilePrefix(outputHandle))); } // commit the table metadata.commitCreateTable(session, outputHandle, fragments); // load the new table ConnectorTableHandle tableHandle = getTableHandle(tableName); List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); // verify the metadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(tableName)); assertEquals(tableMetadata.getOwner(), session.getUser()); assertEquals(tableMetadata.getColumns(), CREATE_TABLE_COLUMNS); // verify the data MaterializedResult result = readTable(tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_DATA.getMaterializedRows()); }
@Test public void testGetPartitionNames() throws Exception { ConnectorTableHandle tableHandle = getTableHandle(tablePartitionFormat); List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(newSession(), tableHandle, new Constraint<>(TupleDomain.all(), bindings -> true), Optional.empty()); assertExpectedTableLayout(getOnlyElement(tableLayoutResults).getTableLayout(), tableLayout); }