You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2019/11/25 23:20:49 UTC
[incubator-iceberg] branch vectorized-read updated: Merge master
branch. Fix projection and filtering test failures (#670)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch vectorized-read
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git
The following commit(s) were added to refs/heads/vectorized-read by this push:
new 8e3ef6b Merge master branch. Fix projection and filtering test failures (#670)
8e3ef6b is described below
commit 8e3ef6b744021dec149ae84bc1ef25880475b428
Author: Samarth Jain <sa...@apache.org>
AuthorDate: Mon Nov 25 15:20:42 2019 -0800
Merge master branch. Fix projection and filtering test failures (#670)
---
.gitignore | 23 +
.travis.yml | 35 +-
DISCLAIMER | 10 +
LICENSE | 10 +
NOTICE | 2 +-
README.md | 7 +-
api/src/main/java/org/apache/iceberg/Files.java | 5 +-
.../java/org/apache/iceberg/OverwriteFiles.java | 43 +-
.../java/org/apache/iceberg/PartitionField.java | 3 +-
.../java/org/apache/iceberg/PartitionSpec.java | 128 +-
.../java/org/apache/iceberg/RewriteManifests.java | 24 +-
api/src/main/java/org/apache/iceberg/Schema.java | 47 +-
api/src/main/java/org/apache/iceberg/Table.java | 7 +
.../main/java/org/apache/iceberg/TableScan.java | 10 +
.../main/java/org/apache/iceberg/UpdateSchema.java | 129 +
.../java/org/apache/iceberg/catalog/Catalog.java | 172 +-
.../java/org/apache/iceberg/catalog/Namespace.java | 20 +
.../apache/iceberg/catalog/TableIdentifier.java | 21 +
.../iceberg/exceptions/NotFoundException.java | 33 +
.../iceberg/expressions/BoundLiteralPredicate.java | 97 +
.../apache/iceberg/expressions/BoundPredicate.java | 39 +-
.../iceberg/expressions/BoundSetPredicate.java | 78 +
.../iceberg/expressions/BoundUnaryPredicate.java | 60 +
.../org/apache/iceberg/expressions/Evaluator.java | 24 +-
.../org/apache/iceberg/expressions/Expression.java | 3 +-
.../iceberg/expressions/ExpressionVisitors.java | 129 +-
.../apache/iceberg/expressions/Expressions.java | 31 +-
.../expressions/InclusiveMetricsEvaluator.java | 77 +-
.../org/apache/iceberg/expressions/Literal.java | 10 +
.../org/apache/iceberg/expressions/Literals.java | 102 +
.../iceberg/expressions/ManifestEvaluator.java | 42 +-
.../org/apache/iceberg/expressions/Predicate.java | 38 +-
.../iceberg/expressions/ResidualEvaluator.java | 5 +
.../expressions/StrictMetricsEvaluator.java | 50 +-
.../iceberg/expressions/UnboundPredicate.java | 197 +-
.../main/java/org/apache/iceberg/io/InputFile.java | 2 +
.../java/org/apache/iceberg/transforms/Bucket.java | 53 +-
.../java/org/apache/iceberg/transforms/Dates.java | 23 +-
.../org/apache/iceberg/transforms/Identity.java | 12 +-
.../apache/iceberg/transforms/ProjectionUtil.java | 132 +-
.../org/apache/iceberg/transforms/Timestamps.java | 26 +-
.../org/apache/iceberg/transforms/Transform.java | 2 +-
.../org/apache/iceberg/transforms/Transforms.java | 18 +-
.../org/apache/iceberg/transforms/Truncate.java | 161 +-
.../iceberg/transforms/UnknownTransform.java | 88 +
.../apache/iceberg/types/CheckCompatibility.java | 27 +-
.../java/org/apache/iceberg/types/Conversions.java | 21 +-
.../java/org/apache/iceberg/types/IndexByName.java | 68 +-
.../java/org/apache/iceberg/types/ReassignIds.java | 3 +
.../java/org/apache/iceberg/types/TypeUtil.java | 21 +-
.../main/java/org/apache/iceberg/types/Types.java | 22 +-
.../java/org/apache/iceberg/util/BinaryUtil.java | 6 +-
.../org/apache/iceberg/util/CharSequenceSet.java | 141 +
.../apache/iceberg/util/CharSequenceWrapper.java | 89 +
.../java/org/apache/iceberg/util/UnicodeUtil.java | 16 +-
.../java/org/apache/iceberg/AssertHelpers.java | 0
.../test/java/org/apache/iceberg/TestHelpers.java | 68 +-
.../iceberg/TestPartitionSpecValidation.java | 108 +-
.../apache/iceberg/TestTransformSerialization.java | 83 +
.../apache/iceberg/expressions/TestEvaluator.java | 209 +-
.../iceberg/expressions/TestExpressionBinding.java | 13 +
.../iceberg/expressions/TestExpressionHelpers.java | 4 +-
.../expressions/TestExpressionSerialization.java | 51 +-
.../TestInclusiveManifestEvaluator.java | 35 +-
.../expressions/TestInclusiveMetricsEvaluator.java | 95 +-
.../iceberg/expressions/TestPredicateBinding.java | 247 +-
.../expressions/TestStrictMetricsEvaluator.java | 57 +-
.../org/apache/iceberg/transforms/TestDates.java | 18 +
.../iceberg/transforms/TestDatesProjection.java | 48 +-
.../apache/iceberg/transforms/TestProjection.java | 18 +-
.../apache/iceberg/transforms/TestStartsWith.java | 96 +
.../apache/iceberg/transforms/TestTimestamps.java | 22 +
.../transforms/TestTimestampsProjection.java | 72 +-
.../transforms/TestTransformSerialization.java | 85 -
.../transforms/TestTruncatesProjection.java | 315 ++
.../iceberg/transforms/TestTruncatesResiduals.java | 183 +
.../org/apache/iceberg/types/TestConversions.java | 42 +
.../iceberg/types/TestReadabilityChecks.java | 22 +
.../iceberg/types/TestSerializableTypes.java | 6 +-
.../org/apache/iceberg/types/TestTypeUtil.java | 56 +
baseline.gradle | 78 +
build.gradle | 240 +-
.../org/apache/iceberg/BaseCombinedScanTask.java | 9 +
.../java/org/apache/iceberg/BaseMetadataTable.java | 9 +-
.../org/apache/iceberg/BaseMetastoreCatalog.java | 285 +-
.../iceberg/BaseMetastoreTableOperations.java | 151 +-
.../org/apache/iceberg/BaseOverwriteFiles.java | 150 +
.../org/apache/iceberg/BaseReplacePartitions.java | 69 +
.../java/org/apache/iceberg/BaseRewriteFiles.java | 60 +
.../org/apache/iceberg/BaseRewriteManifests.java | 344 ++
.../main/java/org/apache/iceberg/BaseSnapshot.java | 66 +-
.../main/java/org/apache/iceberg/BaseTable.java | 13 +-
.../java/org/apache/iceberg/BaseTableScan.java | 61 +-
.../java/org/apache/iceberg/BaseTransaction.java | 296 +-
.../java/org/apache/iceberg/CachingCatalog.java | 107 +
.../apache/iceberg/CommitCallbackTransaction.java | 100 +
.../main/java/org/apache/iceberg/DataFiles.java | 20 +-
.../java/org/apache/iceberg/DataFilesTable.java | 15 +-
.../java/org/apache/iceberg/DataTableScan.java | 25 +-
.../main/java/org/apache/iceberg/FastAppend.java | 2 +-
.../main/java/org/apache/iceberg/FileHistory.java | 4 +-
.../main/java/org/apache/iceberg/FindFiles.java | 212 +
.../java/org/apache/iceberg/GenericDataFile.java | 51 +-
.../org/apache/iceberg/GenericManifestFile.java | 3 +-
.../iceberg/GenericPartitionFieldSummary.java | 5 +-
.../main/java/org/apache/iceberg/HistoryTable.java | 4 +-
.../org/apache/iceberg/ManifestEntriesTable.java | 15 +-
.../java/org/apache/iceberg/ManifestGroup.java | 64 +-
.../java/org/apache/iceberg/ManifestReader.java | 60 +-
.../java/org/apache/iceberg/ManifestWriter.java | 29 +-
.../java/org/apache/iceberg/ManifestsTable.java | 4 +-
.../apache/iceberg/MergingSnapshotProducer.java | 128 +-
.../java/org/apache/iceberg/MetadataTableType.java | 39 +
.../java/org/apache/iceberg/MetricsConfig.java | 3 +-
.../main/java/org/apache/iceberg/MetricsModes.java | 40 +-
.../java/org/apache/iceberg/OverwriteData.java | 93 -
.../java/org/apache/iceberg/PartitionData.java | 37 +-
.../java/org/apache/iceberg/PartitionsTable.java | 137 +
.../java/org/apache/iceberg/RemoveSnapshots.java | 181 +-
.../main/java/org/apache/iceberg/ReplaceFiles.java | 60 -
.../java/org/apache/iceberg/ReplaceManifests.java | 258 -
.../apache/iceberg/ReplacePartitionsOperation.java | 69 -
.../main/java/org/apache/iceberg/ScanSummary.java | 15 +-
.../main/java/org/apache/iceberg/SchemaUpdate.java | 161 +-
.../java/org/apache/iceberg/SnapshotParser.java | 15 +-
.../java/org/apache/iceberg/SnapshotProducer.java | 18 +-
.../java/org/apache/iceberg/SnapshotSummary.java | 10 +
.../java/org/apache/iceberg/SnapshotsTable.java | 4 +-
.../java/org/apache/iceberg/StaticTableScan.java | 10 +-
.../java/org/apache/iceberg/StreamingDelete.java | 6 +
.../java/org/apache/iceberg/TableMetadata.java | 199 +-
.../org/apache/iceberg/TableMetadataParser.java | 60 +-
.../java/org/apache/iceberg/TableOperations.java | 15 +
.../java/org/apache/iceberg/TableProperties.java | 13 +
.../main/java/org/apache/iceberg/Transactions.java | 11 +-
.../main/java/org/apache/iceberg/avro/Avro.java | 32 +-
.../org/apache/iceberg/avro/AvroSchemaUtil.java | 98 +-
.../org/apache/iceberg/avro/AvroSchemaVisitor.java | 25 +-
.../iceberg/avro/AvroSchemaWithTypeVisitor.java | 135 +
.../apache/iceberg/avro/BuildAvroProjection.java | 11 +-
.../org/apache/iceberg/avro/GenericAvroReader.java | 2 +
.../main/java/org/apache/iceberg/avro/HasIds.java | 73 +
.../apache/iceberg/avro/ProjectionDatumReader.java | 12 +-
.../java/org/apache/iceberg/avro/PruneColumns.java | 130 +-
.../java/org/apache/iceberg/avro/ValueReaders.java | 4 +
.../org/apache/iceberg/hadoop/HadoopCatalog.java | 202 +
.../org/apache/iceberg/hadoop/HadoopInputFile.java | 4 +
.../iceberg/hadoop/HadoopTableOperations.java | 48 +-
.../org/apache/iceberg/hadoop/HadoopTables.java | 54 +-
.../iceberg/hadoop/SerializableConfiguration.java | 4 +-
.../org/apache/iceberg/mapping/MappedField.java | 12 +-
.../org/apache/iceberg/mapping/MappedFields.java | 8 +-
.../org/apache/iceberg/mapping/NameMapping.java | 13 +
.../java/org/apache/iceberg/util/BinPacking.java | 5 +
.../java/org/apache/iceberg/util/ByteBuffers.java | 13 +-
.../apache/iceberg/util/CharSequenceWrapper.java | 69 -
.../org/apache/iceberg/util/ManifestFileUtil.java | 124 +
.../main/java/org/apache/iceberg/util/Pair.java | 3 +-
.../org/apache/iceberg/util/StructLikeWrapper.java | 8 +-
.../main/java/org/apache/iceberg/util/Tasks.java | 14 +-
.../apache/iceberg/TableMetadataParserTest.java | 2 +-
.../java/org/apache/iceberg/TableTestBase.java | 70 +-
.../org/apache/iceberg/TestCreateTransaction.java | 22 +-
.../java/org/apache/iceberg/TestFastAppend.java | 4 +
.../java/org/apache/iceberg/TestFindFiles.java | 174 +
.../org/apache/iceberg/TestManifestCleanup.java | 113 +
.../java/org/apache/iceberg/TestMergeAppend.java | 66 +
.../test/java/org/apache/iceberg/TestMetrics.java | 176 +-
.../java/org/apache/iceberg/TestOverwrite.java | 6 +-
.../iceberg/TestOverwriteWithValidation.java | 605 ++
.../org/apache/iceberg/TestPartitionSpecInfo.java | 96 +
.../java/org/apache/iceberg/TestReplaceFiles.java | 286 -
.../org/apache/iceberg/TestReplaceManifests.java | 332 --
.../org/apache/iceberg/TestReplacePartitions.java | 6 +
.../org/apache/iceberg/TestReplaceTransaction.java | 8 +-
.../java/org/apache/iceberg/TestRewriteFiles.java | 286 +
.../org/apache/iceberg/TestRewriteManifests.java | 746 +++
.../apache/iceberg/TestScanDataFileColumns.java | 7 +-
.../java/org/apache/iceberg/TestScanSummary.java | 49 +-
.../java/org/apache/iceberg/TestSchemaUpdate.java | 66 +-
.../java/org/apache/iceberg/TestSnapshotJson.java | 14 +-
.../org/apache/iceberg/TestSnapshotSelection.java | 36 +
.../java/org/apache/iceberg/TestSplitPlanning.java | 40 +
.../java/org/apache/iceberg/TestTableMetadata.java | 483 ++
.../org/apache/iceberg/TestTableMetadataJson.java | 275 -
.../test/java/org/apache/iceberg/TestTables.java | 6 +-
.../apache/iceberg/TestTimestampPartitions.java | 67 +
.../java/org/apache/iceberg/avro/AvroDataTest.java | 7 +-
.../java/org/apache/iceberg/avro/RemoveIds.java | 87 +
.../org/apache/iceberg/avro/TestAvroEnums.java | 87 +
.../apache/iceberg/avro/TestAvroNameMapping.java | 323 ++
.../java/org/apache/iceberg/avro/TestHasIds.java | 54 +
.../apache/iceberg/hadoop/HadoopTableTestBase.java | 28 +-
.../apache/iceberg/hadoop/TestHadoopCatalog.java | 113 +
.../apache/iceberg/hadoop/TestHadoopCommits.java | 26 +
.../apache/iceberg/mapping/TestNameMapping.java | 10 +-
.../org/apache/iceberg/data/GenericRecord.java | 6 +-
.../org/apache/iceberg/data/TableScanIterable.java | 6 +-
.../org/apache/iceberg/data/avro/DataReader.java | 42 +-
.../apache/iceberg/data/avro/IcebergDecoder.java | 4 +-
.../data/parquet/GenericParquetReaders.java | 14 +
.../java/org/apache/iceberg/data/DataTest.java | 10 +-
.../org/apache/iceberg/data/TestGenericRecord.java | 62 +
.../apache/iceberg/data/TestReadProjection.java | 26 +
.../iceberg/data/parquet/TestGenericData.java | 2 +-
.../data/parquet/TestGenericReadProjection.java | 2 +-
deploy.gradle | 119 +
dev/.rat-excludes | 14 +-
dev/source-release.sh | 89 +
dev/stage-binaries.sh | 22 +
examples/Convert table to Iceberg.ipynb | 4 +-
gradle.properties | 20 +-
gradle/wrapper/gradle-wrapper.jar | Bin 54329 -> 0 bytes
gradlew | 4 +
.../java/org/apache/iceberg/hive/ClientPool.java | 4 +-
.../java/org/apache/iceberg/hive/HiveCatalog.java | 124 +-
.../java/org/apache/iceberg/hive/HiveCatalogs.java | 5 +-
.../org/apache/iceberg/hive/HiveClientPool.java | 12 +-
.../apache/iceberg/hive/HiveTableOperations.java | 61 +-
.../apache/iceberg/hive/RuntimeMetaException.java | 4 +
.../iceberg/hive/HiveCreateReplaceTableTest.java | 306 ++
.../org/apache/iceberg/hive/HiveMetastoreTest.java | 61 +
.../org/apache/iceberg/hive/HiveTableBaseTest.java | 40 +-
.../org/apache/iceberg/hive/HiveTableTest.java | 127 +-
.../java/org/apache/iceberg/hive/ScriptRunner.java | 4 +-
.../org/apache/iceberg/hive/TestHiveMetastore.java | 1 +
.../iceberg/hive/TestHiveTableConcurrency.java | 109 +
.../src/test/resources/hive-schema-3.1.0.derby.sql | 21 +
jitpack.yml | 17 +-
jmh.gradle | 38 +
.../org/apache/iceberg/parquet/ColumnIterator.java | 4 +-
.../org/apache/iceberg/parquet/ColumnWriter.java | 12 +-
.../apache/iceberg/parquet/MessageTypeToType.java | 9 +-
.../org/apache/iceberg/parquet/PageIterator.java | 71 +-
.../java/org/apache/iceberg/parquet/Parquet.java | 77 +-
.../org/apache/iceberg/parquet/ParquetAvro.java | 11 +-
.../iceberg/parquet/ParquetAvroValueReaders.java | 25 +-
.../apache/iceberg/parquet/ParquetAvroWriter.java | 14 +-
.../parquet/ParquetDictionaryRowGroupFilter.java | 65 +-
.../org/apache/iceberg/parquet/ParquetFilters.java | 76 +-
.../java/org/apache/iceberg/parquet/ParquetIO.java | 9 +-
.../apache/iceberg/parquet/ParquetIterable.java | 8 +-
.../parquet/ParquetMetricsRowGroupFilter.java | 74 +-
.../apache/iceberg/parquet/ParquetReadSupport.java | 10 +-
.../org/apache/iceberg/parquet/ParquetReader.java | 3 +-
.../apache/iceberg/parquet/ParquetSchemaUtil.java | 4 +
.../apache/iceberg/parquet/ParquetTypeVisitor.java | 27 +-
.../org/apache/iceberg/parquet/ParquetUtil.java | 211 +-
.../iceberg/parquet/ParquetValueReaders.java | 108 +-
.../iceberg/parquet/ParquetValueWriters.java | 5 +-
.../org/apache/iceberg/parquet/ParquetWriter.java | 18 +-
.../org/apache/iceberg/parquet/PruneColumns.java | 2 +-
.../apache/iceberg/parquet/TypeToMessageType.java | 12 +-
.../iceberg/parquet/TypeWithSchemaVisitor.java | 25 +-
.../iceberg/parquet/VectorizedParquetReader.java | 63 +-
.../parquet/arrow/NullValuesColumnVector.java | 130 +
.../parquet/vectorized/ColumnarBatchReaders.java | 32 +-
.../parquet/vectorized/NullabilityHolder.java | 2 +-
.../iceberg/parquet/vectorized/VectorHolder.java | 4 +-
.../parquet/vectorized/VectorizedArrowReader.java | 34 +-
.../vectorized/VectorizedColumnIterator.java | 2 +-
.../parquet/vectorized/VectorizedPageIterator.java | 81 +-
.../vectorized/VectorizedParquetValuesReader.java | 321 +-
.../parquet/vectorized/VectorizedReader.java | 4 +-
.../test/java/org/apache/iceberg/TestHelpers.java | 4 +
.../iceberg/parquet/ParquetWritingTestUtils.java | 5 +
.../parquet/TestDictionaryRowGroupFilter.java | 247 +-
.../iceberg/parquet/TestMetricsRowGroupFilter.java | 262 +-
.../parquet/TestMetricsRowGroupFilterTypes.java | 12 +-
.../apache/iceberg/parquet/TestParquetMetrics.java | 17 +
.../apache/iceberg/pig/IcebergPigInputFormat.java | 62 +-
.../org/apache/iceberg/pig/IcebergStorage.java | 159 +-
.../org/apache/iceberg/pig/PigParquetReader.java | 128 +-
.../java/org/apache/iceberg/pig/SchemaUtil.java | 39 +-
.../org/apache/iceberg/pig/SchemaUtilTest.java | 59 +-
python/CHANGELOG.md | 17 +
python/README.md | 50 +-
python/iceberg/__init__.py | 1 -
python/iceberg/api/__init__.py | 7 +-
python/iceberg/api/data_file.py | 2 +-
python/iceberg/api/data_operations.py | 23 +
python/iceberg/api/exceptions/__init__.py | 26 -
python/iceberg/api/expressions/evaluator.py | 17 +-
python/iceberg/api/expressions/expressions.py | 88 +
.../expressions/inclusive_manifest_evaluator.py | 16 +-
.../api/expressions/inclusive_metrics_evaluator.py | 14 +-
python/iceberg/api/expressions/literals.py | 10 +-
.../iceberg/api/expressions/residual_evaluator.py | 18 +-
.../api/expressions/strict_metrics_evaluator.py | 20 +-
python/iceberg/api/file_format.py | 16 +-
python/iceberg/api/file_scan_task.py | 5 +
python/iceberg/api/files.py | 2 +-
python/iceberg/api/manifest_file.py | 21 +
python/iceberg/api/partition_spec.py | 15 +-
python/iceberg/api/schema.py | 9 +-
python/iceberg/api/snapshot.py | 12 +
python/iceberg/api/table_scan.py | 9 +
python/iceberg/api/tables.py | 11 +-
python/iceberg/api/types/conversions.py | 14 +-
python/iceberg/api/types/type.py | 7 +-
python/iceberg/api/types/type_util.py | 62 +-
python/iceberg/api/types/types.py | 36 +-
python/iceberg/core/__init__.py | 4 +-
python/iceberg/core/avro/avro_schema_util.py | 13 +-
python/iceberg/core/avro/avro_to_iceberg.py | 69 +-
python/iceberg/core/base_combined_scan_task.py | 37 +
python/iceberg/core/base_file_scan_task.py | 126 +
.../core/base_metastore_table_operations.py | 44 +-
python/iceberg/core/base_snapshot.py | 24 +-
python/iceberg/core/base_table.py | 3 +-
python/iceberg/core/base_table_scan.py | 187 +-
python/iceberg/core/data_files.py | 9 +-
python/iceberg/core/data_table_scan.py | 98 +
python/iceberg/core/filesystem/__init__.py | 26 +
.../core/{hadoop => filesystem}/file_status.py | 0
python/iceberg/core/filesystem/file_system.py | 120 +
.../core/filesystem/filesystem_table_operations.py | 140 +
.../iceberg/core/filesystem/filesystem_tables.py | 53 +
python/iceberg/core/filesystem/local_filesystem.py | 81 +
python/iceberg/core/filesystem/s3_filesystem.py | 256 +
python/iceberg/core/filesystem/util.py | 39 +
python/iceberg/core/filtered_manifest.py | 118 +
python/iceberg/core/generic_data_file.py | 117 +-
python/iceberg/core/generic_manifest_file.py | 41 +-
.../core/generic_partition_field_summary.py | 26 +-
python/iceberg/core/hadoop/__init__.py | 22 -
python/iceberg/core/hadoop/file_system.py | 25 -
python/iceberg/core/hadoop/hadoop_input_file.py | 61 -
python/iceberg/core/hadoop/hadoop_output_file.py | 49 -
.../iceberg/core/hadoop/hadoop_table_operations.py | 49 -
python/iceberg/core/hadoop/local_filesystem.py | 84 -
.../iceberg/core/hadoop/s3_filesystem_wrapper.py | 54 -
python/iceberg/core/hadoop/util.py | 53 -
python/iceberg/core/manifest_entry.py | 10 +-
python/iceberg/core/manifest_reader.py | 132 +-
python/iceberg/core/partition_data.py | 11 +-
python/iceberg/core/partition_summary.py | 39 +-
python/iceberg/core/scan_summary.py | 409 ++
python/iceberg/core/schema_parser.py | 18 +-
python/iceberg/core/snapshot_parser.py | 9 +-
python/iceberg/core/table_metadata.py | 27 +-
python/iceberg/core/table_metadata_parser.py | 10 +-
python/iceberg/core/table_properties.py | 76 +
python/iceberg/core/util/__init__.py | 18 +-
python/iceberg/core/util/bin_packing.py | 2 +
python/iceberg/hive/__init__.py | 25 +
python/iceberg/hive/hive_table_operations.py | 59 +
python/iceberg/hive/hive_tables.py | 57 +
python/iceberg/spark/__init__.py | 19 -
python/iceberg/spark/source/__init__.py | 19 -
python/iceberg/spark/source/spark_catalog.py | 53 -
python/iceberg/spark/table_identifier.py | 21 -
python/setup.py | 15 +-
python/tests/__init__.py | 1 -
python/tests/api/__init__.py | 1 -
.../test_inclusive_manifest_evaluator.py | 16 +
python/tests/api/expressions/test_str_to_expr.py | 143 +
python/tests/api/test_file_format.py | 43 +
python/tests/api/test_helpers.py | 27 +-
python/tests/api/transforms/__init__.py | 1 -
python/tests/api/types/__init__.py | 1 -
python/tests/core/__init__.py | 1 -
python/tests/core/avro/__init__.py | 1 -
python/tests/core/avro/conftest.py | 64 +
python/tests/core/avro/test_avro.py | 15 +
python/tests/core/avro/test_read_projection.py | 27 +
python/tests/core/conftest.py | 52 +-
python/tests/core/test_base_table_scan.py | 38 +
python/tests/core/test_snapshot_json.py | 34 +-
python/tests/core/test_table_metadata_parser.py | 7 +-
python/tests/core/utils/__init__.py | 14 +
python/tests/core/utils/test_bin_packing.py | 2 +-
python/tests/hive/__init__.py | 18 +
python/tests/hive/conftest.py | 18 +
python/tests/hive/test_hive_tables.py | 120 +
python/tox.ini | 22 +-
settings.gradle | 4 +-
site/docs/Concurrency in Iceberg.pdf | Bin 0 -> 632605 bytes
site/docs/api-quickstart.md | 32 +-
site/docs/api.md | 17 +
site/docs/configuration.md | 33 +-
site/docs/custom-catalog.md | 152 +
site/docs/disclaimer.md | 5 +
site/docs/evolution.md | 17 +
site/docs/getting-started.md | 17 +
site/docs/how-to-release.md | 161 +
.../javadoc/0.7.0-incubating/allclasses-frame.html | 352 ++
.../0.7.0-incubating/allclasses-noframe.html | 352 ++
.../javadoc/0.7.0-incubating/constant-values.html | 787 +++
.../javadoc/0.7.0-incubating/deprecated-list.html | 122 +
site/docs/javadoc/0.7.0-incubating/help-doc.html | 223 +
site/docs/javadoc/0.7.0-incubating/index-all.html | 5755 ++++++++++++++++++++
site/docs/javadoc/0.7.0-incubating/index.html | 75 +
.../org/apache/iceberg/Accessor.html | 239 +
.../org/apache/iceberg/Accessors.html | 235 +
.../org/apache/iceberg/AppendFiles.html | 279 +
.../org/apache/iceberg/BaseCombinedScanTask.html | 331 ++
.../org/apache/iceberg/BaseMetastoreCatalog.html | 463 ++
.../iceberg/BaseMetastoreTableOperations.html | 649 +++
.../org/apache/iceberg/BaseOverwriteFiles.html | 893 +++
.../org/apache/iceberg/BaseReplacePartitions.html | 756 +++
.../org/apache/iceberg/BaseRewriteManifests.html | 581 ++
.../org/apache/iceberg/BaseTable.html | 926 ++++
.../org/apache/iceberg/CombinedScanTask.html | 262 +
.../org/apache/iceberg/DataFile.html | 503 ++
.../org/apache/iceberg/DataFiles.Builder.html | 493 ++
.../org/apache/iceberg/DataFiles.html | 401 ++
.../iceberg/DataFilesTable.FilesTableScan.html | 707 +++
.../org/apache/iceberg/DataFilesTable.html | 896 +++
.../org/apache/iceberg/DataOperations.html | 317 ++
.../org/apache/iceberg/DataTableScan.html | 770 +++
.../org/apache/iceberg/DataTask.html | 281 +
.../org/apache/iceberg/DeleteFiles.html | 314 ++
.../org/apache/iceberg/ExpireSnapshots.html | 302 +
.../org/apache/iceberg/FileFormat.html | 412 ++
.../org/apache/iceberg/FileHistory.Builder.html | 338 ++
.../org/apache/iceberg/FileHistory.html | 254 +
.../org/apache/iceberg/FileScanTask.html | 387 ++
.../0.7.0-incubating/org/apache/iceberg/Files.html | 274 +
.../org/apache/iceberg/Filterable.html | 455 ++
.../org/apache/iceberg/FilteredManifest.html | 448 ++
.../org/apache/iceberg/FindFiles.Builder.html | 467 ++
.../org/apache/iceberg/FindFiles.html | 254 +
.../org/apache/iceberg/GenericManifestFile.html | 690 +++
.../iceberg/GenericPartitionFieldSummary.html | 514 ++
.../org/apache/iceberg/HasTableOperations.html | 224 +
.../org/apache/iceberg/HistoryEntry.html | 248 +
.../org/apache/iceberg/HistoryTable.html | 879 +++
.../org/apache/iceberg/LocationProviders.html | 237 +
.../org/apache/iceberg/ManifestEntriesTable.html | 880 +++
.../ManifestFile.PartitionFieldSummary.html | 336 ++
.../org/apache/iceberg/ManifestFile.html | 506 ++
.../org/apache/iceberg/ManifestReader.html | 564 ++
.../org/apache/iceberg/ManifestWriter.html | 418 ++
.../org/apache/iceberg/ManifestsTable.html | 890 +++
.../org/apache/iceberg/MetadataTableType.html | 388 ++
.../org/apache/iceberg/Metrics.html | 383 ++
.../org/apache/iceberg/MetricsConfig.html | 261 +
.../org/apache/iceberg/MetricsModes.Counts.html | 296 +
.../org/apache/iceberg/MetricsModes.Full.html | 296 +
.../apache/iceberg/MetricsModes.MetricsMode.html | 173 +
.../org/apache/iceberg/MetricsModes.None.html | 296 +
.../org/apache/iceberg/MetricsModes.Truncate.html | 309 ++
.../org/apache/iceberg/MetricsModes.html | 280 +
.../org/apache/iceberg/OverwriteFiles.html | 380 ++
.../org/apache/iceberg/PartitionField.html | 334 ++
.../org/apache/iceberg/PartitionSpec.Builder.html | 444 ++
.../org/apache/iceberg/PartitionSpec.html | 505 ++
.../org/apache/iceberg/PartitionSpecParser.html | 300 +
.../org/apache/iceberg/PendingUpdate.html | 269 +
.../org/apache/iceberg/ReplacePartitions.html | 284 +
.../org/apache/iceberg/RewriteFiles.html | 259 +
.../org/apache/iceberg/RewriteManifests.html | 287 +
.../org/apache/iceberg/Rollback.html | 273 +
.../org/apache/iceberg/ScanSummary.Builder.html | 358 ++
.../iceberg/ScanSummary.PartitionMetrics.html | 329 ++
.../org/apache/iceberg/ScanSummary.html | 267 +
.../org/apache/iceberg/ScanTask.html | 311 ++
.../org/apache/iceberg/Schema.html | 662 +++
.../org/apache/iceberg/SchemaParser.html | 296 +
.../org/apache/iceberg/SetLocation.html | 336 ++
.../org/apache/iceberg/Snapshot.html | 404 ++
.../org/apache/iceberg/SnapshotParser.html | 250 +
.../apache/iceberg/SnapshotSummary.Builder.html | 357 ++
.../org/apache/iceberg/SnapshotSummary.html | 413 ++
.../org/apache/iceberg/SnapshotUpdate.html | 299 +
.../org/apache/iceberg/SnapshotsTable.html | 879 +++
.../org/apache/iceberg/StructLike.html | 262 +
.../org/apache/iceberg/SystemProperties.html | 267 +
.../0.7.0-incubating/org/apache/iceberg/Table.html | 757 +++
.../iceberg/TableMetadata.SnapshotLogEntry.html | 320 ++
.../org/apache/iceberg/TableMetadata.html | 712 +++
.../apache/iceberg/TableMetadataParser.Codec.html | 369 ++
.../org/apache/iceberg/TableMetadataParser.html | 355 ++
.../org/apache/iceberg/TableOperations.html | 403 ++
.../org/apache/iceberg/TableProperties.html | 1057 ++++
.../org/apache/iceberg/TableScan.html | 587 ++
.../org/apache/iceberg/Tables.html | 278 +
.../org/apache/iceberg/Transaction.html | 480 ++
.../org/apache/iceberg/Transactions.html | 280 +
.../org/apache/iceberg/UpdateLocation.html | 245 +
.../org/apache/iceberg/UpdateProperties.html | 297 +
.../org/apache/iceberg/UpdateSchema.html | 543 ++
.../org/apache/iceberg/avro/Avro.ReadBuilder.html | 331 ++
.../org/apache/iceberg/avro/Avro.WriteBuilder.html | 378 ++
.../org/apache/iceberg/avro/Avro.html | 271 +
.../org/apache/iceberg/avro/AvroIterable.html | 305 ++
.../org/apache/iceberg/avro/AvroSchemaUtil.html | 545 ++
.../org/apache/iceberg/avro/AvroSchemaVisitor.html | 350 ++
.../org/apache/iceberg/avro/LogicalMap.html | 266 +
.../apache/iceberg/avro/ProjectionDatumReader.html | 306 ++
.../org/apache/iceberg/avro/UUIDConversion.html | 344 ++
.../org/apache/iceberg/avro/ValueReader.html | 230 +
.../iceberg/avro/ValueReaders.StructReader.html | 351 ++
.../org/apache/iceberg/avro/ValueReaders.html | 513 ++
.../org/apache/iceberg/avro/ValueWriter.html | 232 +
.../iceberg/avro/ValueWriters.StructWriter.html | 321 ++
.../org/apache/iceberg/avro/ValueWriters.html | 496 ++
.../org/apache/iceberg/avro/package-frame.html | 37 +
.../org/apache/iceberg/avro/package-summary.html | 207 +
.../org/apache/iceberg/avro/package-tree.html | 164 +
.../org/apache/iceberg/catalog/Catalog.html | 721 +++
.../org/apache/iceberg/catalog/Namespace.html | 305 ++
.../apache/iceberg/catalog/TableIdentifier.html | 335 ++
.../org/apache/iceberg/catalog/package-frame.html | 25 +
.../apache/iceberg/catalog/package-summary.html | 165 +
.../org/apache/iceberg/catalog/package-tree.html | 140 +
.../apache/iceberg/common/DynClasses.Builder.html | 381 ++
.../org/apache/iceberg/common/DynClasses.html | 254 +
.../iceberg/common/DynConstructors.Builder.html | 400 ++
.../iceberg/common/DynConstructors.Ctor.html | 384 ++
.../org/apache/iceberg/common/DynConstructors.html | 272 +
.../iceberg/common/DynFields.BoundField.html | 254 +
.../apache/iceberg/common/DynFields.Builder.html | 587 ++
.../iceberg/common/DynFields.StaticField.html | 254 +
.../iceberg/common/DynFields.UnboundField.html | 358 ++
.../org/apache/iceberg/common/DynFields.html | 268 +
.../iceberg/common/DynMethods.BoundMethod.html | 257 +
.../apache/iceberg/common/DynMethods.Builder.html | 737 +++
.../iceberg/common/DynMethods.StaticMethod.html | 257 +
.../iceberg/common/DynMethods.UnboundMethod.html | 367 ++
.../org/apache/iceberg/common/DynMethods.html | 278 +
.../org/apache/iceberg/common/package-frame.html | 34 +
.../org/apache/iceberg/common/package-summary.html | 204 +
.../org/apache/iceberg/common/package-tree.html | 152 +
.../org/apache/iceberg/data/GenericRecord.html | 465 ++
.../iceberg/data/IcebergGenerics.ScanBuilder.html | 325 ++
.../org/apache/iceberg/data/IcebergGenerics.html | 263 +
.../org/apache/iceberg/data/Record.html | 302 +
.../org/apache/iceberg/data/avro/DataReader.html | 281 +
.../org/apache/iceberg/data/avro/DataWriter.html | 281 +
.../apache/iceberg/data/avro/IcebergDecoder.html | 381 ++
.../apache/iceberg/data/avro/IcebergEncoder.html | 351 ++
.../apache/iceberg/data/avro/package-frame.html | 23 +
.../apache/iceberg/data/avro/package-summary.html | 152 +
.../org/apache/iceberg/data/avro/package-tree.html | 142 +
.../org/apache/iceberg/data/package-frame.html | 26 +
.../org/apache/iceberg/data/package-summary.html | 163 +
.../org/apache/iceberg/data/package-tree.html | 145 +
.../data/parquet/GenericParquetReaders.html | 237 +
.../iceberg/data/parquet/GenericParquetWriter.html | 235 +
.../apache/iceberg/data/parquet/package-frame.html | 21 +
.../iceberg/data/parquet/package-summary.html | 144 +
.../apache/iceberg/data/parquet/package-tree.html | 136 +
.../apache/iceberg/encryption/EncryptedFiles.html | 312 ++
.../iceberg/encryption/EncryptedInputFile.html | 245 +
.../iceberg/encryption/EncryptedOutputFile.html | 246 +
.../iceberg/encryption/EncryptionKeyMetadata.html | 288 +
.../iceberg/encryption/EncryptionKeyMetadatas.html | 248 +
.../iceberg/encryption/EncryptionManager.html | 307 ++
.../encryption/PlaintextEncryptionManager.html | 326 ++
.../apache/iceberg/encryption/package-frame.html | 29 +
.../apache/iceberg/encryption/package-summary.html | 186 +
.../apache/iceberg/encryption/package-tree.html | 148 +
.../org/apache/iceberg/events/Listener.html | 222 +
.../org/apache/iceberg/events/Listeners.html | 253 +
.../org/apache/iceberg/events/ScanEvent.html | 315 ++
.../org/apache/iceberg/events/package-frame.html | 25 +
.../org/apache/iceberg/events/package-summary.html | 165 +
.../org/apache/iceberg/events/package-tree.html | 140 +
.../iceberg/exceptions/AlreadyExistsException.html | 284 +
.../iceberg/exceptions/CommitFailedException.html | 284 +
.../iceberg/exceptions/NoSuchTableException.html | 284 +
.../iceberg/exceptions/NotFoundException.html | 284 +
.../iceberg/exceptions/RuntimeIOException.html | 296 +
.../iceberg/exceptions/ValidationException.html | 325 ++
.../apache/iceberg/exceptions/package-frame.html | 25 +
.../apache/iceberg/exceptions/package-summary.html | 172 +
.../apache/iceberg/exceptions/package-tree.html | 152 +
.../org/apache/iceberg/expressions/And.html | 327 ++
.../org/apache/iceberg/expressions/Binder.html | 282 +
.../apache/iceberg/expressions/BoundPredicate.html | 274 +
.../apache/iceberg/expressions/BoundReference.html | 300 +
.../org/apache/iceberg/expressions/Evaluator.html | 301 +
.../iceberg/expressions/Expression.Operation.html | 545 ++
.../org/apache/iceberg/expressions/Expression.html | 269 +
.../ExpressionVisitors.BoundExpressionVisitor.html | 467 ++
.../ExpressionVisitors.ExpressionVisitor.html | 365 ++
.../iceberg/expressions/ExpressionVisitors.html | 305 ++
.../apache/iceberg/expressions/Expressions.html | 516 ++
.../org/apache/iceberg/expressions/False.html | 302 +
.../expressions/InclusiveMetricsEvaluator.html | 303 ++
.../org/apache/iceberg/expressions/Literal.html | 441 ++
.../iceberg/expressions/ManifestEvaluator.html | 286 +
.../apache/iceberg/expressions/NamedReference.html | 261 +
.../org/apache/iceberg/expressions/Not.html | 314 ++
.../org/apache/iceberg/expressions/Or.html | 327 ++
.../org/apache/iceberg/expressions/Predicate.html | 319 ++
.../Projections.ProjectionEvaluator.html | 304 ++
.../apache/iceberg/expressions/Projections.html | 383 ++
.../org/apache/iceberg/expressions/Reference.html | 180 +
.../iceberg/expressions/ResidualEvaluator.html | 320 ++
.../expressions/StrictMetricsEvaluator.html | 288 +
.../org/apache/iceberg/expressions/True.html | 302 +
.../iceberg/expressions/UnboundPredicate.html | 301 +
.../apache/iceberg/expressions/package-frame.html | 51 +
.../iceberg/expressions/package-summary.html | 295 +
.../apache/iceberg/expressions/package-tree.html | 184 +
.../org/apache/iceberg/hadoop/HadoopFileIO.html | 335 ++
.../org/apache/iceberg/hadoop/HadoopInputFile.html | 440 ++
.../apache/iceberg/hadoop/HadoopOutputFile.html | 382 ++
.../iceberg/hadoop/HadoopTableOperations.html | 436 ++
.../org/apache/iceberg/hadoop/HadoopTables.html | 375 ++
.../iceberg/hadoop/SerializableConfiguration.html | 279 +
.../org/apache/iceberg/hadoop/package-frame.html | 25 +
.../org/apache/iceberg/hadoop/package-summary.html | 171 +
.../org/apache/iceberg/hadoop/package-tree.html | 140 +
.../org/apache/iceberg/hive/ClientPool.Action.html | 230 +
.../org/apache/iceberg/hive/ClientPool.html | 332 ++
.../org/apache/iceberg/hive/HiveCatalog.html | 475 ++
.../org/apache/iceberg/hive/HiveCatalogs.html | 235 +
.../org/apache/iceberg/hive/HiveClientPool.html | 340 ++
.../apache/iceberg/hive/HiveTableOperations.html | 357 ++
.../org/apache/iceberg/hive/HiveTypeConverter.html | 235 +
.../apache/iceberg/hive/RuntimeMetaException.html | 298 +
.../org/apache/iceberg/hive/package-frame.html | 33 +
.../org/apache/iceberg/hive/package-summary.html | 195 +
.../org/apache/iceberg/hive/package-tree.html | 168 +
.../org/apache/iceberg/io/CloseableGroup.html | 300 +
.../CloseableIterable.ConcatCloseableIterable.html | 296 +
.../org/apache/iceberg/io/CloseableIterable.html | 351 ++
.../apache/iceberg/io/DelegatingInputStream.html | 219 +
.../apache/iceberg/io/DelegatingOutputStream.html | 219 +
.../org/apache/iceberg/io/FileAppender.html | 317 ++
.../org/apache/iceberg/io/FileIO.html | 300 +
.../org/apache/iceberg/io/InputFile.html | 295 +
.../org/apache/iceberg/io/LocationProvider.html | 264 +
.../org/apache/iceberg/io/OutputFile.html | 302 +
.../apache/iceberg/io/PositionOutputStream.html | 295 +
.../org/apache/iceberg/io/SeekableInputStream.html | 322 ++
.../org/apache/iceberg/io/package-frame.html | 34 +
.../org/apache/iceberg/io/package-summary.html | 206 +
.../org/apache/iceberg/io/package-tree.html | 177 +
.../org/apache/iceberg/mapping/MappedField.html | 377 ++
.../org/apache/iceberg/mapping/MappedFields.html | 351 ++
.../org/apache/iceberg/mapping/MappingUtil.html | 274 +
.../org/apache/iceberg/mapping/NameMapping.html | 331 ++
.../apache/iceberg/mapping/NameMappingParser.html | 257 +
.../org/apache/iceberg/mapping/package-frame.html | 24 +
.../apache/iceberg/mapping/package-summary.html | 162 +
.../org/apache/iceberg/mapping/package-tree.html | 139 +
.../org/apache/iceberg/orc/ColumnIdMap.html | 521 ++
.../org/apache/iceberg/orc/ORC.ReadBuilder.html | 318 ++
.../org/apache/iceberg/orc/ORC.WriteBuilder.html | 334 ++
.../org/apache/iceberg/orc/ORC.html | 271 +
.../org/apache/iceberg/orc/OrcMetrics.html | 250 +
.../org/apache/iceberg/orc/OrcValueReader.html | 229 +
.../org/apache/iceberg/orc/OrcValueWriter.html | 239 +
.../org/apache/iceberg/orc/TypeConversion.html | 306 ++
.../iceberg/orc/VectorizedRowBatchIterator.html | 293 +
.../org/apache/iceberg/orc/package-frame.html | 31 +
.../org/apache/iceberg/orc/package-summary.html | 191 +
.../org/apache/iceberg/orc/package-tree.html | 146 +
.../org/apache/iceberg/package-frame.html | 115 +
.../org/apache/iceberg/package-summary.html | 621 +++
.../org/apache/iceberg/package-tree.html | 279 +
.../org/apache/iceberg/parquet/ColumnIterator.html | 453 ++
.../org/apache/iceberg/parquet/ColumnWriter.html | 429 ++
.../iceberg/parquet/Parquet.ReadBuilder.html | 396 ++
.../iceberg/parquet/Parquet.WriteBuilder.html | 391 ++
.../org/apache/iceberg/parquet/Parquet.html | 271 +
.../apache/iceberg/parquet/ParquetAvroReader.html | 235 +
.../ParquetAvroValueReaders.TimeMillisReader.html | 286 +
...quetAvroValueReaders.TimestampMillisReader.html | 286 +
.../iceberg/parquet/ParquetAvroValueReaders.html | 260 +
.../apache/iceberg/parquet/ParquetAvroWriter.html | 235 +
.../parquet/ParquetDictionaryRowGroupFilter.html | 301 +
.../apache/iceberg/parquet/ParquetIterable.html | 292 +
.../parquet/ParquetMetricsRowGroupFilter.html | 299 +
.../org/apache/iceberg/parquet/ParquetReader.html | 338 ++
.../apache/iceberg/parquet/ParquetSchemaUtil.html | 355 ++
.../apache/iceberg/parquet/ParquetTypeVisitor.html | 386 ++
.../org/apache/iceberg/parquet/ParquetUtil.html | 270 +
.../apache/iceberg/parquet/ParquetValueReader.html | 264 +
.../ParquetValueReaders.BinaryAsDecimalReader.html | 306 ++
.../parquet/ParquetValueReaders.BytesReader.html | 304 ++
.../ParquetValueReaders.FloatAsDoubleReader.html | 339 ++
.../ParquetValueReaders.IntAsLongReader.html | 339 ++
...ParquetValueReaders.IntegerAsDecimalReader.html | 306 ++
.../parquet/ParquetValueReaders.ListReader.html | 352 ++
.../ParquetValueReaders.LongAsDecimalReader.html | 306 ++
.../parquet/ParquetValueReaders.MapReader.html | 356 ++
.../ParquetValueReaders.PrimitiveReader.html | 365 ++
...ParquetValueReaders.RepeatedKeyValueReader.html | 411 ++
.../ParquetValueReaders.RepeatedReader.html | 407 ++
.../parquet/ParquetValueReaders.ReusableEntry.html | 342 ++
.../parquet/ParquetValueReaders.StringReader.html | 304 ++
.../parquet/ParquetValueReaders.StructReader.html | 528 ++
.../parquet/ParquetValueReaders.UnboxedReader.html | 388 ++
.../iceberg/parquet/ParquetValueReaders.html | 342 ++
.../apache/iceberg/parquet/ParquetValueWriter.html | 253 +
.../ParquetValueWriters.PrimitiveWriter.html | 358 ++
...ParquetValueWriters.RepeatedKeyValueWriter.html | 343 ++
.../ParquetValueWriters.RepeatedWriter.html | 341 ++
.../parquet/ParquetValueWriters.StructWriter.html | 339 ++
.../iceberg/parquet/ParquetValueWriters.html | 396 ++
.../iceberg/parquet/ParquetWriteAdapter.html | 371 ++
.../org/apache/iceberg/parquet/TripleWriter.html | 396 ++
.../apache/iceberg/parquet/TypeToMessageType.html | 417 ++
.../iceberg/parquet/TypeWithSchemaVisitor.html | 403 ++
.../org/apache/iceberg/parquet/package-frame.html | 66 +
.../apache/iceberg/parquet/package-summary.html | 325 ++
.../org/apache/iceberg/parquet/package-tree.html | 197 +
.../IcebergPigInputFormat.IcebergRecordReader.html | 383 ++
.../apache/iceberg/pig/IcebergPigInputFormat.html | 285 +
.../org/apache/iceberg/pig/IcebergStorage.html | 632 +++
.../org/apache/iceberg/pig/PigParquetReader.html | 239 +
.../org/apache/iceberg/pig/SchemaUtil.html | 289 +
.../org/apache/iceberg/pig/package-frame.html | 23 +
.../org/apache/iceberg/pig/package-summary.html | 152 +
.../org/apache/iceberg/pig/package-tree.html | 151 +
.../iceberg/spark/PruneColumnsWithReordering.html | 341 ++
.../spark/PruneColumnsWithoutReordering.html | 341 ++
.../org/apache/iceberg/spark/SparkFilters.html | 235 +
.../org/apache/iceberg/spark/SparkSchemaUtil.html | 528 ++
.../apache/iceberg/spark/data/SparkAvroReader.html | 300 +
.../apache/iceberg/spark/data/SparkAvroWriter.html | 300 +
.../apache/iceberg/spark/data/SparkOrcReader.html | 288 +
.../apache/iceberg/spark/data/SparkOrcWriter.html | 289 +
.../iceberg/spark/data/SparkParquetReaders.html | 237 +
.../iceberg/spark/data/SparkParquetWriters.html | 237 +
.../iceberg/spark/data/SparkValueReaders.html | 195 +
.../iceberg/spark/data/SparkValueWriters.html | 195 +
.../apache/iceberg/spark/data/package-frame.html | 27 +
.../apache/iceberg/spark/data/package-summary.html | 174 +
.../apache/iceberg/spark/data/package-tree.html | 142 +
.../org/apache/iceberg/spark/hacks/Hive.html | 254 +
.../apache/iceberg/spark/hacks/package-frame.html | 20 +
.../iceberg/spark/hacks/package-summary.html | 140 +
.../apache/iceberg/spark/hacks/package-tree.html | 135 +
.../org/apache/iceberg/spark/package-frame.html | 23 +
.../org/apache/iceberg/spark/package-summary.html | 154 +
.../org/apache/iceberg/spark/package-tree.html | 142 +
.../apache/iceberg/spark/source/IcebergSource.html | 363 ++
.../iceberg/spark/source/StreamingWriter.html | 390 ++
.../apache/iceberg/spark/source/package-frame.html | 21 +
.../iceberg/spark/source/package-summary.html | 144 +
.../apache/iceberg/spark/source/package-tree.html | 136 +
.../iceberg/transforms/PartitionSpecVisitor.html | 332 ++
.../org/apache/iceberg/transforms/Transform.html | 376 ++
.../org/apache/iceberg/transforms/Transforms.html | 419 ++
.../iceberg/transforms/UnknownTransform.html | 430 ++
.../apache/iceberg/transforms/package-frame.html | 26 +
.../apache/iceberg/transforms/package-summary.html | 167 +
.../apache/iceberg/transforms/package-tree.html | 145 +
.../apache/iceberg/types/CheckCompatibility.html | 391 ++
.../org/apache/iceberg/types/Comparators.html | 300 +
.../org/apache/iceberg/types/Conversions.html | 282 +
.../org/apache/iceberg/types/IndexByName.html | 365 ++
.../org/apache/iceberg/types/Type.NestedType.html | 368 ++
.../apache/iceberg/types/Type.PrimitiveType.html | 329 ++
.../org/apache/iceberg/types/Type.TypeID.html | 524 ++
.../org/apache/iceberg/types/Type.html | 385 ++
.../types/TypeUtil.CustomOrderSchemaVisitor.html | 354 ++
.../org/apache/iceberg/types/TypeUtil.NextID.html | 224 +
.../iceberg/types/TypeUtil.SchemaVisitor.html | 388 ++
.../org/apache/iceberg/types/TypeUtil.html | 586 ++
.../org/apache/iceberg/types/Types.BinaryType.html | 345 ++
.../apache/iceberg/types/Types.BooleanType.html | 345 ++
.../org/apache/iceberg/types/Types.DateType.html | 345 ++
.../apache/iceberg/types/Types.DecimalType.html | 373 ++
.../org/apache/iceberg/types/Types.DoubleType.html | 345 ++
.../org/apache/iceberg/types/Types.FixedType.html | 358 ++
.../org/apache/iceberg/types/Types.FloatType.html | 345 ++
.../apache/iceberg/types/Types.IntegerType.html | 345 ++
.../org/apache/iceberg/types/Types.ListType.html | 491 ++
.../org/apache/iceberg/types/Types.LongType.html | 345 ++
.../org/apache/iceberg/types/Types.MapType.html | 525 ++
.../apache/iceberg/types/Types.NestedField.html | 436 ++
.../org/apache/iceberg/types/Types.StringType.html | 345 ++
.../org/apache/iceberg/types/Types.StructType.html | 461 ++
.../org/apache/iceberg/types/Types.TimeType.html | 311 ++
.../apache/iceberg/types/Types.TimestampType.html | 371 ++
.../org/apache/iceberg/types/Types.UUIDType.html | 345 ++
.../org/apache/iceberg/types/Types.html | 318 ++
.../org/apache/iceberg/types/package-frame.html | 55 +
.../org/apache/iceberg/types/package-summary.html | 280 +
.../org/apache/iceberg/types/package-tree.html | 194 +
.../apache/iceberg/util/BinPacking.ListPacker.html | 294 +
.../iceberg/util/BinPacking.PackingIterable.html | 297 +
.../org/apache/iceberg/util/BinPacking.html | 258 +
.../org/apache/iceberg/util/BinaryUtil.html | 276 +
.../org/apache/iceberg/util/ByteBuffers.html | 248 +
.../apache/iceberg/util/CharSequenceWrapper.html | 296 +
.../org/apache/iceberg/util/ExceptionUtil.html | 195 +
.../org/apache/iceberg/util/Exceptions.html | 261 +
.../org/apache/iceberg/util/JsonUtil.html | 368 ++
.../org/apache/iceberg/util/ManifestFileUtil.html | 239 +
.../org/apache/iceberg/util/Pair.html | 378 ++
.../org/apache/iceberg/util/ParallelIterable.html | 328 ++
.../org/apache/iceberg/util/PropertyUtil.html | 273 +
.../org/apache/iceberg/util/SnapshotUtil.html | 262 +
.../org/apache/iceberg/util/StructLikeWrapper.html | 296 +
.../org/apache/iceberg/util/Tasks.Builder.html | 520 ++
.../org/apache/iceberg/util/Tasks.FailureTask.html | 232 +
.../org/apache/iceberg/util/Tasks.Task.html | 230 +
.../iceberg/util/Tasks.UnrecoverableException.html | 295 +
.../org/apache/iceberg/util/Tasks.html | 294 +
.../org/apache/iceberg/util/ThreadPools.html | 301 +
.../org/apache/iceberg/util/UnicodeUtil.html | 299 +
.../org/apache/iceberg/util/package-frame.html | 47 +
.../org/apache/iceberg/util/package-summary.html | 250 +
.../org/apache/iceberg/util/package-tree.html | 175 +
.../javadoc/0.7.0-incubating/overview-frame.html | 45 +
.../javadoc/0.7.0-incubating/overview-summary.html | 232 +
.../javadoc/0.7.0-incubating/overview-tree.html | 642 +++
site/docs/javadoc/0.7.0-incubating/package-list | 25 +
site/docs/javadoc/0.7.0-incubating/script.js | 30 +
.../javadoc/0.7.0-incubating/serialized-form.html | 1059 ++++
site/docs/javadoc/0.7.0-incubating/stylesheet.css | 574 ++
site/docs/javadoc/index.html | 4 +-
site/docs/partitioning.md | 18 +-
site/docs/performance.md | 17 +
site/docs/presto.md | 17 +
site/docs/python-api-intro.md | 143 +
site/docs/python-feature-support.md | 72 +
site/docs/python-quickstart.md | 60 +
site/docs/releases.md | 38 +
site/docs/reliability.md | 19 +-
site/docs/schemas.md | 17 +
site/docs/snapshots.md | 17 +
site/docs/spark.md | 45 +-
site/docs/spec.md | 44 +-
site/docs/terms.md | 17 +
site/mkdocs.yml | 10 +-
spark-runtime/LICENSE | 428 ++
spark-runtime/NOTICE | 81 +
.../spark/source/IcebergSourceBenchmark.java | 2 +-
.../VectorizedDictionaryEncodedBenchmark.java | 27 +-
...VectorizedDictionaryEncodedFloatsBenchmark.java | 66 +-
...ctorizedDictionaryEncodedIntegersBenchmark.java | 67 +-
.../VectorizedDictionaryEncodedLongsBenchmark.java | 66 +-
...ectorizedDictionaryEncodedStringsBenchmark.java | 144 +-
...zedFallbackToPlainEncodingStringsBenchmark.java | 105 +-
.../VectorizedIcebergSourceBenchmark.java | 7 +-
.../vectorized/VectorizedReadLongsBenchmark.java | 1 -
.../vectorized/VectorizedReadStringsBenchmark.java | 3 +-
.../iceberg/spark/PruneColumnsWithReordering.java | 2 +-
.../spark/PruneColumnsWithoutReordering.java | 3 +-
.../org/apache/iceberg/spark/SparkFilters.java | 11 +-
.../apache/iceberg/spark/data/SparkAvroReader.java | 2 +
.../apache/iceberg/spark/data/SparkOrcWriter.java | 16 +-
.../iceberg/spark/data/SparkValueReaders.java | 22 +
.../data/vector/VectorizedSparkParquetReaders.java | 235 +-
.../java/org/apache/iceberg/spark/hacks/Hive.java | 65 -
.../apache/iceberg/spark/source/IcebergSource.java | 99 +-
.../apache/iceberg/spark/source/PartitionKey.java | 3 +-
.../org/apache/iceberg/spark/source/Reader.java | 53 +-
.../iceberg/spark/source/StreamingWriter.java | 6 +-
.../iceberg/spark/source/StructInternalRow.java | 26 +-
.../iceberg/spark/source/VectorizedReading.java | 58 +-
.../org/apache/iceberg/spark/source/Writer.java | 299 +-
...org.apache.spark.sql.sources.DataSourceRegister | 16 -
.../org/apache/iceberg/spark/SparkTableUtil.scala | 393 +-
.../apache/iceberg/TestDataFileSerialization.java | 188 +
.../apache/iceberg/spark/data/AvroDataTest.java | 261 +-
.../apache/iceberg/spark/data/DictionaryData.java | 469 +-
.../org/apache/iceberg/spark/data/RandomData.java | 26 +-
.../org/apache/iceberg/spark/data/TestHelpers.java | 8 +-
.../iceberg/spark/data/TestParquetAvroReader.java | 386 +-
.../iceberg/spark/data/TestParquetAvroWriter.java | 3 +-
.../iceberg/spark/data/TestSparkAvroEnums.java | 95 +
...rkParquetDictionaryEncodedVectorizedReader.java | 59 +-
...ackToDictionaryEncodingForVectorizedReader.java | 75 +-
.../data/TestSparkParquetVectorizedReader.java | 12 +-
.../iceberg/spark/data/TestSparkParquetWriter.java | 3 +-
.../iceberg/spark/source/TestDataFrameWrites.java | 131 +-
.../spark/source/TestDataSourceOptions.java | 30 +-
.../iceberg/spark/source/TestFilteredScan.java | 60 +
.../spark/source/TestForwardCompatibility.java | 261 +
.../source/TestIcebergSourceHadoopTables.java | 466 ++
.../spark/source/TestIcebergSourceHiveTables.java | 236 +-
.../iceberg/spark/source/TestParquetWrite.java | 162 +
.../iceberg/spark/source/TestPartitionValues.java | 4 +-
.../iceberg/spark/source/TestSparkSchema.java | 213 +
.../iceberg/spark/source/TestSparkTableUtil.java | 113 +-
.../TestSparkTableUtilWithInMemoryCatalog.java | 247 +
.../spark/source/TestStructuredStreaming.java | 61 +
.../apache/iceberg/spark/source/TestTables.java | 2 +-
.../iceberg/spark/source/ThreeColumnRecord.java | 89 +
tasks.gradle | 45 +
versions.lock | 62 +-
versions.props | 11 +-
883 files changed, 163516 insertions(+), 6617 deletions(-)
diff --git a/.gitignore b/.gitignore
index 5550eda..bc5c82f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,9 +3,32 @@
*.ipr
*.iws
*.iml
+
# gradle build
.gradle
build
+gradle/
out
+gradle/wrapper/gradle-wrapper.jar
+
# web site build
site/site
+
+__pycache__/
+*.py[cod]
+.eggs/
+.tox/
+env/
+venv/
+*.egg-info/
+test-reports
+build/
+dist/
+sdist/
+.coverage
+coverage.xml
+.pytest_cache/
+spark/tmp/
+
+# vscode/eclipse files
+.project
diff --git a/.travis.yml b/.travis.yml
index a90a272..01f12ff 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,18 +17,35 @@
# under the License.
#
-language: java
sudo: false
dist: trusty
-before_cache:
- - rm -f $HOME/.gradle/caches/modules-2/modules-2.lock
- - rm -fr $HOME/.gradle/caches/*/plugin-resolution/
-
-cache:
- directories:
- - $HOME/.gradle/caches/
- - $HOME/.gradle/wrapper/
+matrix:
+ include:
+ - language: java
+ name: java
+ before_cache:
+ - rm -f $HOME/.gradle/caches/modules-2/modules-2.lock
+ - rm -fr $HOME/.gradle/caches/*/plugin-resolution/
+ cache:
+ directories:
+ - $HOME/.gradle/caches/
+ - $HOME/.gradle/wrapper/
+ - language: python
+ python: 3.6
+ install: pip install tox-travis
+ before_script:
+ - cd python
+ script: tox
+ deploy:
+ provider: pypi
+ user: TBD
+ password:
+ secure: TBD
+ distributions: "sdist bdist_wheel"
+ skip_existing: true
+ on:
+ tags: true
git:
depth: false
diff --git a/DISCLAIMER b/DISCLAIMER
new file mode 100644
index 0000000..f33cec3
--- /dev/null
+++ b/DISCLAIMER
@@ -0,0 +1,10 @@
+Apache Iceberg is an effort undergoing incubation at the Apache Software
+Foundation (ASF), sponsored by the Apache Incubator PMC.
+
+Incubation is required of all newly accepted projects until a further review
+indicates that the infrastructure, communications, and decision making process
+have stabilized in a manner consistent with other successful ASF projects.
+
+While incubation status is not necessarily a reflection of the completeness
+or stability of the code, it does indicate that the project has yet to be
+fully endorsed by the ASF.
diff --git a/LICENSE b/LICENSE
index fec6e32..5cf503c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -203,6 +203,16 @@
--------------------------------------------------------------------------------
+This product includes a gradle wrapper.
+
+* gradlew and gradle/wrapper/gradle-wrapper.properties
+
+Copyright: 2010-2019 Gradle Authors.
+Home page: https://github.com/gradle/gradle
+License: http://www.apache.org/licenses/LICENSE-2.0
+
+--------------------------------------------------------------------------------
+
This product includes code from Apache Avro.
* Conversion in DecimalWriter is based on Avro's Conversions.DecimalConversion.
diff --git a/NOTICE b/NOTICE
index e81861c..c56a7a2 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,6 +1,6 @@
Apache Iceberg (incubating)
-Copyright 2017-2018 The Apache Software Foundation
+Copyright 2017-2019 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
diff --git a/README.md b/README.md
index 7551daf..a518a2e 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,10 @@ Community discussions happen primarily on the [dev mailing list][dev-list] or on
### Building
-Iceberg is built using Gradle 5.2.1.
+Iceberg is built using Gradle 5.4.1.
+
+* To invoke a build and run tests: `./gradlew build`
+* To skip tests: `./gradlew build -x test`
* To invoke a build and run tests: `./gradlew build`
* To skip tests: `./gradlew build -x test`
@@ -80,7 +83,7 @@ Iceberg's Spark integration is compatible with the following Spark versions:
| --------------- | ------------- |
| 0.2.0+ * | 2.3.0 |
| 0.3.0+ * | 2.3.2 |
-| master branch | 2.4.0 |
+| master branch | 2.4.x |
An asterisk (*) refers to releases under the now deprecated [Netflix/iceberg](https://github.com/Netflix/iceberg) repo.
diff --git a/api/src/main/java/org/apache/iceberg/Files.java b/api/src/main/java/org/apache/iceberg/Files.java
index 4e570ad..e394913 100644
--- a/api/src/main/java/org/apache/iceberg/Files.java
+++ b/api/src/main/java/org/apache/iceberg/Files.java
@@ -25,6 +25,7 @@ import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.file.Paths;
import org.apache.iceberg.exceptions.AlreadyExistsException;
+import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.exceptions.RuntimeIOException;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.OutputFile;
@@ -66,7 +67,7 @@ public class Files {
try {
return new PositionFileOutputStream(file, new RandomAccessFile(file, "rw"));
} catch (FileNotFoundException e) {
- throw new RuntimeIOException(e, "Failed to create file: %s", file);
+ throw new NotFoundException(e, "Failed to create file: %s", file);
}
}
@@ -124,7 +125,7 @@ public class Files {
try {
return new SeekableFileInputStream(new RandomAccessFile(file, "r"));
} catch (FileNotFoundException e) {
- throw new RuntimeIOException(e, "Failed to read file: %s", file);
+ throw new NotFoundException(e, "Failed to read file: %s", file);
}
}
diff --git a/api/src/main/java/org/apache/iceberg/OverwriteFiles.java b/api/src/main/java/org/apache/iceberg/OverwriteFiles.java
index 8af7974..b07701a 100644
--- a/api/src/main/java/org/apache/iceberg/OverwriteFiles.java
+++ b/api/src/main/java/org/apache/iceberg/OverwriteFiles.java
@@ -24,18 +24,21 @@ import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Projections;
/**
- * API for overwriting files in a table by filter expression.
+ * API for overwriting files in a table.
* <p>
* This API accumulates file additions and produces a new {@link Snapshot} of the table by replacing
- * all the files that match the filter expression with the set of additions. This operation is used
- * to implement idempotent writes that always replace a section of a table with new data.
+ * all the deleted files with the set of additions. This operation is used to implement idempotent
+ * writes that always replace a section of a table with new data or update/delete operations that
+ * eagerly overwrite files.
* <p>
- * Overwrites can be validated
+ * Overwrites can be validated. The default validation mode is idempotent, meaning the overwrite is
+ * correct and should be committed out regardless of other concurrent changes to the table.
+ * For example, this can be used for replacing all the data for day D with query results.
+ * Alternatively, this API can be configured for overwriting certain files with their filtered
+ * versions while ensuring no new data that would need to be filtered has been added.
* <p>
* When committing, these changes will be applied to the latest table snapshot. Commit conflicts
* will be resolved by applying the changes to the new latest snapshot and reattempting the commit.
- * This has no requirements for the latest snapshot and will not fail based on other snapshot
- * changes.
*/
public interface OverwriteFiles extends SnapshotUpdate<OverwriteFiles> {
/**
@@ -66,6 +69,14 @@ public interface OverwriteFiles extends SnapshotUpdate<OverwriteFiles> {
OverwriteFiles addFile(DataFile file);
/**
+ * Delete a {@link DataFile} from the table.
+ *
+ * @param file a data file
+ * @return this for method chaining
+ */
+ OverwriteFiles deleteFile(DataFile file);
+
+ /**
* Signal that each file added to the table must match the overwrite expression.
* <p>
* If this method is called, each added file is validated on commit to ensure that it matches the
@@ -74,5 +85,23 @@ public interface OverwriteFiles extends SnapshotUpdate<OverwriteFiles> {
*
* @return this for method chaining
*/
- OverwriteFiles validateAddedFiles();
+ OverwriteFiles validateAddedFilesMatchOverwriteFilter();
+
+ /**
+ * Enables validation that files added concurrently do not conflict with this commit's operation.
+ * <p>
+ * This method should be called when the table is queried to determine which files to delete/append.
+ * If a concurrent operation commits a new file after the data was read and that file might
+ * contain rows matching the specified conflict detection filter, the overwrite operation
+ * will detect this during retries and fail.
+ * <p>
+ * Calling this method with a correct conflict detection filter is required to maintain
+ * serializable isolation for eager update/delete operations. Otherwise, the isolation level
+ * will be snapshot isolation.
+ *
+ * @param readSnapshotId the snapshot id that was used to read the data or null if the table was empty
+ * @param conflictDetectionFilter an expression on rows in the table
+ * @return this for method chaining
+ */
+ OverwriteFiles validateNoConflictingAppends(Long readSnapshotId, Expression conflictDetectionFilter);
}
diff --git a/api/src/main/java/org/apache/iceberg/PartitionField.java b/api/src/main/java/org/apache/iceberg/PartitionField.java
index ceb4db9..3964207 100644
--- a/api/src/main/java/org/apache/iceberg/PartitionField.java
+++ b/api/src/main/java/org/apache/iceberg/PartitionField.java
@@ -67,8 +67,7 @@ public class PartitionField implements Serializable {
public boolean equals(Object other) {
if (this == other) {
return true;
- }
- if (other == null || getClass() != other.getClass()) {
+ } else if (!(other instanceof PartitionField)) {
return false;
}
diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
index 5013119..7dc070b 100644
--- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java
+++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
@@ -33,12 +33,13 @@ import java.net.URLEncoder;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import java.util.Set;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.transforms.Transforms;
+import org.apache.iceberg.transforms.UnknownTransform;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.StructType;
/**
* Represents how to produce partition data for a table.
@@ -99,16 +100,16 @@ public class PartitionSpec implements Serializable {
}
/**
- * @return a {@link Types.StructType} for partition data defined by this spec.
+ * @return a {@link StructType} for partition data defined by this spec.
*/
- public Types.StructType partitionType() {
+ public StructType partitionType() {
List<Types.NestedField> structFields = Lists.newArrayListWithExpectedSize(fields.length);
for (int i = 0; i < fields.length; i += 1) {
PartitionField field = fields[i];
Type sourceType = schema.findType(field.sourceId());
Type resultType = field.transform().getResultType(sourceType);
- // assign ids for partition fields starting at 100 to leave room for data file's other fields
+ // assign ids for partition fields starting at PARTITION_DATA_ID_START to leave room for data file's other fields
structFields.add(
Types.NestedField.optional(PARTITION_DATA_ID_START + i, field.name(), resultType));
}
@@ -123,9 +124,13 @@ public class PartitionSpec implements Serializable {
Class<?>[] classes = new Class<?>[fields.length];
for (int i = 0; i < fields.length; i += 1) {
PartitionField field = fields[i];
- Type sourceType = schema.findType(field.sourceId());
- Type result = field.transform().getResultType(sourceType);
- classes[i] = result.typeId().javaClass();
+ if (field.transform() instanceof UnknownTransform) {
+ classes[i] = Object.class;
+ } else {
+ Type sourceType = schema.findType(field.sourceId());
+ Type result = field.transform().getResultType(sourceType);
+ classes[i] = result.typeId().javaClass();
+ }
}
this.lazyJavaClasses = classes;
@@ -196,8 +201,7 @@ public class PartitionSpec implements Serializable {
public boolean equals(Object other) {
if (this == other) {
return true;
- }
- if (other == null || getClass() != other.getClass()) {
+ } else if (!(other instanceof PartitionSpec)) {
return false;
}
@@ -210,7 +214,7 @@ public class PartitionSpec implements Serializable {
@Override
public int hashCode() {
- return Objects.hashCode(Arrays.hashCode(fields));
+ return Integer.hashCode(Arrays.hashCode(fields));
}
private List<PartitionField> lazyFieldList() {
@@ -327,6 +331,21 @@ public class PartitionSpec implements Serializable {
}
private void checkAndAddPartitionName(String name) {
+ checkAndAddPartitionName(name, null);
+ }
+
+ private void checkAndAddPartitionName(String name, Integer identitySourceColumnId) {
+ Types.NestedField schemaField = schema.findField(name);
+ if (identitySourceColumnId != null) {
+ // for identity transform case we allow conflicts between partition and schema field name as
+ // long as they are sourced from the same schema field
+ Preconditions.checkArgument(schemaField == null || schemaField.fieldId() == identitySourceColumnId,
+ "Cannot create identity partition sourced from different field in schema: %s", name);
+ } else {
+ // for all other transforms we don't allow conflicts between partition name and schema field name
+ Preconditions.checkArgument(schemaField == null,
+ "Cannot create partition from name that exists in schema: %s", name);
+ }
Preconditions.checkArgument(name != null && !name.isEmpty(),
"Cannot use empty or null partition name: %s", name);
Preconditions.checkArgument(!partitionNames.contains(name),
@@ -352,82 +371,103 @@ public class PartitionSpec implements Serializable {
return sourceColumn;
}
- public Builder identity(String sourceName) {
- checkAndAddPartitionName(sourceName);
+ Builder identity(String sourceName, String targetName) {
Types.NestedField sourceColumn = findSourceColumn(sourceName);
+ checkAndAddPartitionName(targetName, sourceColumn.fieldId());
fields.add(new PartitionField(
- sourceColumn.fieldId(), sourceName, Transforms.identity(sourceColumn.type())));
+ sourceColumn.fieldId(), targetName, Transforms.identity(sourceColumn.type())));
return this;
}
- public Builder year(String sourceName) {
- String name = sourceName + "_year";
- checkAndAddPartitionName(name);
+ public Builder identity(String sourceName) {
+ return identity(sourceName, sourceName);
+ }
+
+ public Builder year(String sourceName, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
PartitionField field = new PartitionField(
- sourceColumn.fieldId(), name, Transforms.year(sourceColumn.type()));
+ sourceColumn.fieldId(), targetName, Transforms.year(sourceColumn.type()));
checkForRedundantPartitions(field);
fields.add(field);
return this;
}
- public Builder month(String sourceName) {
- String name = sourceName + "_month";
- checkAndAddPartitionName(name);
+ public Builder year(String sourceName) {
+ return year(sourceName, sourceName + "_year");
+ }
+
+ public Builder month(String sourceName, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
PartitionField field = new PartitionField(
- sourceColumn.fieldId(), name, Transforms.month(sourceColumn.type()));
+ sourceColumn.fieldId(), targetName, Transforms.month(sourceColumn.type()));
checkForRedundantPartitions(field);
fields.add(field);
return this;
}
- public Builder day(String sourceName) {
- String name = sourceName + "_day";
- checkAndAddPartitionName(name);
+ public Builder month(String sourceName) {
+ return month(sourceName, sourceName + "_month");
+ }
+
+ public Builder day(String sourceName, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
PartitionField field = new PartitionField(
- sourceColumn.fieldId(), name, Transforms.day(sourceColumn.type()));
+ sourceColumn.fieldId(), targetName, Transforms.day(sourceColumn.type()));
checkForRedundantPartitions(field);
fields.add(field);
return this;
}
- public Builder hour(String sourceName) {
- String name = sourceName + "_hour";
- checkAndAddPartitionName(name);
+ public Builder day(String sourceName) {
+ return day(sourceName, sourceName + "_day");
+ }
+
+ public Builder hour(String sourceName, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
PartitionField field = new PartitionField(
- sourceColumn.fieldId(), name, Transforms.hour(sourceColumn.type()));
+ sourceColumn.fieldId(), targetName, Transforms.hour(sourceColumn.type()));
checkForRedundantPartitions(field);
fields.add(field);
return this;
}
- public Builder bucket(String sourceName, int numBuckets) {
- String name = sourceName + "_bucket";
- checkAndAddPartitionName(name);
+ public Builder hour(String sourceName) {
+ return hour(sourceName, sourceName + "_hour");
+ }
+
+ public Builder bucket(String sourceName, int numBuckets, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
fields.add(new PartitionField(
- sourceColumn.fieldId(), name, Transforms.bucket(sourceColumn.type(), numBuckets)));
+ sourceColumn.fieldId(), targetName, Transforms.bucket(sourceColumn.type(), numBuckets)));
return this;
}
- public Builder truncate(String sourceName, int width) {
- String name = sourceName + "_trunc";
- checkAndAddPartitionName(name);
+ public Builder bucket(String sourceName, int numBuckets) {
+ return bucket(sourceName, numBuckets, sourceName + "_bucket");
+ }
+
+ public Builder truncate(String sourceName, int width, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
fields.add(new PartitionField(
- sourceColumn.fieldId(), name, Transforms.truncate(sourceColumn.type(), width)));
+ sourceColumn.fieldId(), targetName, Transforms.truncate(sourceColumn.type(), width)));
return this;
}
- public Builder add(int sourceId, String name, String transform) {
- checkAndAddPartitionName(name);
+ public Builder truncate(String sourceName, int width) {
+ return truncate(sourceName, width, sourceName + "_trunc");
+ }
+
+ Builder add(int sourceId, String name, String transform) {
Types.NestedField column = schema.findField(sourceId);
- Preconditions.checkNotNull(column, "Cannot find source column: %d", sourceId);
- fields.add(new PartitionField(
- sourceId, name, Transforms.fromString(column.type(), transform)));
+ checkAndAddPartitionName(name, column.fieldId());
+ Preconditions.checkNotNull(column, "Cannot find source column: %s", sourceId);
+ fields.add(new PartitionField(sourceId, name, Transforms.fromString(column.type(), transform)));
return this;
}
@@ -438,9 +478,11 @@ public class PartitionSpec implements Serializable {
}
}
- public static void checkCompatibility(PartitionSpec spec, Schema schema) {
+ static void checkCompatibility(PartitionSpec spec, Schema schema) {
for (PartitionField field : spec.fields) {
Type sourceType = schema.findType(field.sourceId());
+ ValidationException.check(sourceType != null,
+ "Cannot find source column for partition field: %s", field);
ValidationException.check(sourceType.isPrimitiveType(),
"Cannot partition by non-primitive source field: %s", sourceType);
ValidationException.check(
diff --git a/api/src/main/java/org/apache/iceberg/RewriteManifests.java b/api/src/main/java/org/apache/iceberg/RewriteManifests.java
index 1ed1ddf..74032eb 100644
--- a/api/src/main/java/org/apache/iceberg/RewriteManifests.java
+++ b/api/src/main/java/org/apache/iceberg/RewriteManifests.java
@@ -29,6 +29,10 @@ import java.util.function.Predicate;
* described only by the manifest files that were added, and commits that snapshot as the
* current.
* <p>
+ * This API can be used to rewrite matching manifests according to a clustering function as well as
+ * to replace specific manifests. Manifests that are deleted or added directly are ignored during
+ * the rewrite process. The set of active files in replaced manifests must be the same as in new manifests.
+ * <p>
* When committing, these changes will be applied to the latest table snapshot. Commit conflicts
* will be resolved by applying the changes to the new latest snapshot and reattempting the commit.
*/
@@ -37,7 +41,8 @@ public interface RewriteManifests extends SnapshotUpdate<RewriteManifests> {
* Groups an existing {@link DataFile} by a cluster key produced by a function. The cluster key
* will determine which data file will be associated with a particular manifest. All data files
* with the same cluster key will be written to the same manifest (unless the file is large and
- * split into multiple files).
+ * split into multiple files). Manifests deleted via {@link #deleteManifest(ManifestFile)} or
+ * added via {@link #addManifest(ManifestFile)} are ignored during the rewrite process.
*
* @param func Function used to cluster data files to manifests.
* @return this for method chaining
@@ -54,4 +59,21 @@ public interface RewriteManifests extends SnapshotUpdate<RewriteManifests> {
* @return this for method chaining
*/
RewriteManifests rewriteIf(Predicate<ManifestFile> predicate);
+
+ /**
+ * Deletes a {@link ManifestFile manifest file} from the table.
+ *
+ * @param manifest a manifest to delete
+ * @return this for method chaining
+ */
+ RewriteManifests deleteManifest(ManifestFile manifest);
+
+ /**
+ * Adds a {@link ManifestFile manifest file} to the table. The added manifest cannot contain new
+ * or deleted files.
+ *
+ * @param manifest a manifest to add
+ * @return this for method chaining
+ */
+ RewriteManifests addManifest(ManifestFile manifest);
}
diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java
index dbfbe28..35656aa 100644
--- a/api/src/main/java/org/apache/iceberg/Schema.java
+++ b/api/src/main/java/org/apache/iceberg/Schema.java
@@ -34,7 +34,8 @@ import java.util.Set;
import java.util.stream.Collectors;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
-import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.NestedField;
+import org.apache.iceberg.types.Types.StructType;
/**
* The schema of a data table.
@@ -43,27 +44,31 @@ public class Schema implements Serializable {
private static final Joiner NEWLINE = Joiner.on('\n');
private static final String ALL_COLUMNS = "*";
- private final Types.StructType struct;
+ private final StructType struct;
private transient BiMap<String, Integer> aliasToId = null;
- private transient Map<Integer, Types.NestedField> idToField = null;
+ private transient Map<Integer, NestedField> idToField = null;
private transient BiMap<String, Integer> nameToId = null;
private transient BiMap<String, Integer> lowerCaseNameToId = null;
private transient Map<Integer, Accessor<StructLike>> idToAccessor = null;
- public Schema(List<Types.NestedField> columns, Map<String, Integer> aliases) {
- this.struct = Types.StructType.of(columns);
+ public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
+ this.struct = StructType.of(columns);
this.aliasToId = aliases != null ? ImmutableBiMap.copyOf(aliases) : null;
+
+ // validate the schema through IndexByName visitor
+ lazyNameToId();
}
- public Schema(List<Types.NestedField> columns) {
- this.struct = Types.StructType.of(columns);
+ public Schema(List<NestedField> columns) {
+ this.struct = StructType.of(columns);
+ lazyNameToId();
}
- public Schema(Types.NestedField... columns) {
+ public Schema(NestedField... columns) {
this(Arrays.asList(columns));
}
- private Map<Integer, Types.NestedField> lazyIdToField() {
+ private Map<Integer, NestedField> lazyIdToField() {
if (idToField == null) {
this.idToField = TypeUtil.indexById(struct);
}
@@ -104,18 +109,18 @@ public class Schema implements Serializable {
}
/**
- * Returns the underlying {@link Types.StructType struct type} for this schema.
+ * Returns the underlying {@link StructType struct type} for this schema.
*
* @return the StructType version of this schema.
*/
- public Types.StructType asStruct() {
+ public StructType asStruct() {
return struct;
}
/**
- * @return a List of the {@link Types.NestedField columns} in this Schema.
+ * @return a List of the {@link NestedField columns} in this Schema.
*/
- public List<Types.NestedField> columns() {
+ public List<NestedField> columns() {
return struct.fields();
}
@@ -131,7 +136,7 @@ public class Schema implements Serializable {
* @return a Type for the sub-field or null if it is not found
*/
public Type findType(int id) {
- Types.NestedField field = lazyIdToField().get(id);
+ NestedField field = lazyIdToField().get(id);
if (field != null) {
return field.type();
}
@@ -139,24 +144,24 @@ public class Schema implements Serializable {
}
/**
- * Returns the sub-field identified by the field id as a {@link Types.NestedField}.
+ * Returns the sub-field identified by the field id as a {@link NestedField}.
*
* @param id a field id
* @return the sub-field or null if it is not found
*/
- public Types.NestedField findField(int id) {
+ public NestedField findField(int id) {
return lazyIdToField().get(id);
}
/**
- * Returns a sub-field by name as a {@link Types.NestedField}.
+ * Returns a sub-field by name as a {@link NestedField}.
* <p>
* The result may be a top-level or a nested field.
*
* @param name a String name
* @return a Type for the sub-field or null if it is not found
*/
- public Types.NestedField findField(String name) {
+ public NestedField findField(String name) {
Preconditions.checkArgument(!name.isEmpty(), "Invalid column name: (empty)");
Integer id = lazyNameToId().get(name);
if (id != null) {
@@ -166,14 +171,14 @@ public class Schema implements Serializable {
}
/**
- * Returns a sub-field by name as a {@link Types.NestedField}.
+ * Returns a sub-field by name as a {@link NestedField}.
* <p>
* The result may be a top-level or a nested field.
*
* @param name a String name
* @return the sub-field or null if it is not found
*/
- public Types.NestedField caseInsensitiveFindField(String name) {
+ public NestedField caseInsensitiveFindField(String name) {
Preconditions.checkArgument(!name.isEmpty(), "Invalid column name: (empty)");
Integer id = lazyLowerCaseNameToId().get(name.toLowerCase(Locale.ROOT));
if (id != null) {
@@ -306,7 +311,7 @@ public class Schema implements Serializable {
public String toString() {
return String.format("table {\n%s\n}",
NEWLINE.join(struct.fields().stream()
- .map(f -> " " + f + (f.doc() == null ? "" : " COMMENT '" + f.doc() + "'"))
+ .map(f -> " " + f)
.collect(Collectors.toList())));
}
}
diff --git a/api/src/main/java/org/apache/iceberg/Table.java b/api/src/main/java/org/apache/iceberg/Table.java
index db15d0e..63c55e5 100644
--- a/api/src/main/java/org/apache/iceberg/Table.java
+++ b/api/src/main/java/org/apache/iceberg/Table.java
@@ -59,6 +59,13 @@ public interface Table {
PartitionSpec spec();
/**
+ * Return a map of {@link PartitionSpec partition specs} for this table.
+ *
+ * @return this table's partition specs map
+ */
+ Map<Integer, PartitionSpec> specs();
+
+ /**
* Return a map of string properties for this table.
*
* @return this table's properties map
diff --git a/api/src/main/java/org/apache/iceberg/TableScan.java b/api/src/main/java/org/apache/iceberg/TableScan.java
index 8259fd1..5bbcf13 100644
--- a/api/src/main/java/org/apache/iceberg/TableScan.java
+++ b/api/src/main/java/org/apache/iceberg/TableScan.java
@@ -59,6 +59,16 @@ public interface TableScan {
TableScan asOfTime(long timestampMillis);
/**
+ * Create a new {@link TableScan} from this scan's configuration that will override the {@link Table}'s behavior based
+ * on the incoming pair. Unknown properties will be ignored.
+ *
+ * @param property name of the table property to be overridden
+ * @param value value to override with
+ * @return a new scan based on this with overridden behavior
+ */
+ TableScan option(String property, String value);
+
+ /**
* Create a new {@link TableScan} from this with the schema as its projection.
*
* @param schema a projection schema
diff --git a/api/src/main/java/org/apache/iceberg/UpdateSchema.java b/api/src/main/java/org/apache/iceberg/UpdateSchema.java
index 7fe6bec..503be92 100644
--- a/api/src/main/java/org/apache/iceberg/UpdateSchema.java
+++ b/api/src/main/java/org/apache/iceberg/UpdateSchema.java
@@ -31,6 +31,22 @@ import org.apache.iceberg.types.Type;
public interface UpdateSchema extends PendingUpdate<Schema> {
/**
+ * Allow incompatible changes to the schema.
+ * <p>
+ * Incompatible changes can cause failures when attempting to read older data files. For example, adding a required
+ * column and attempting to read data files without that column will cause a failure. However, if there are no data
+ * files that are not compatible with the change, it can be allowed.
+ * <p>
+ * This option allows incompatible changes to be made to a schema. This should be used when the caller has validated
+ * that the change will not break. For example, if a column is added as optional but always populated and data older
+ * than the column addition has been deleted from the table, this can be used with {@link #requireColumn(String)} to
+ * mark the column required.
+ *
+ * @return this for method chaining
+ */
+ UpdateSchema allowIncompatibleChanges();
+
+ /**
* Add a new top-level column.
* <p>
* Because "." may be interpreted as a column path separator or may be used in field names, it is
@@ -113,6 +129,100 @@ public interface UpdateSchema extends PendingUpdate<Schema> {
UpdateSchema addColumn(String parent, String name, Type type, String doc);
/**
+ * Add a new required top-level column.
+ * <p>
+ * This is an incompatible change that can break reading older data. This method will result in an exception unless
+ * {@link #allowIncompatibleChanges()} has been called.
+ * <p>
+ * Because "." may be interpreted as a column path separator or may be used in field names, it is
+ * not allowed in names passed to this method. To add to nested structures or to add fields with
+ * names that contain ".", use {@link #addRequiredColumn(String, String, Type)}.
+ * <p>
+ * If type is a nested type, its field IDs are reassigned when added to the existing schema.
+ *
+ * @param name name for the new column
+ * @param type type for the new column
+ * @return this for method chaining
+ * @throws IllegalArgumentException If name contains "."
+ */
+ default UpdateSchema addRequiredColumn(String name, Type type) {
+ return addRequiredColumn(name, type, null);
+ }
+
+ /**
+ * Add a new required top-level column.
+ * <p>
+ * This is an incompatible change that can break reading older data. This method will result in an exception unless
+ * {@link #allowIncompatibleChanges()} has been called.
+ * <p>
+ * Because "." may be interpreted as a column path separator or may be used in field names, it is
+ * not allowed in names passed to this method. To add to nested structures or to add fields with
+ * names that contain ".", use {@link #addRequiredColumn(String, String, Type)}.
+ * <p>
+ * If type is a nested type, its field IDs are reassigned when added to the existing schema.
+ *
+ * @param name name for the new column
+ * @param type type for the new column
+ * @param doc documentation string for the new column
+ * @return this for method chaining
+ * @throws IllegalArgumentException If name contains "."
+ */
+ UpdateSchema addRequiredColumn(String name, Type type, String doc);
+
+ /**
+ * Add a new required top-level column.
+ * <p>
+ * This is an incompatible change that can break reading older data. This method will result in an exception unless
+ * {@link #allowIncompatibleChanges()} has been called.
+ * <p>
+ * The parent name is used to find the parent using {@link Schema#findField(String)}. If the
+ * parent name is null, the new column will be added to the root as a top-level column. If parent
+ * identifies a struct, a new column is added to that struct. If it identifies a list, the column
+ * is added to the list element struct, and if it identifies a map, the new column is added to
+ * the map's value struct.
+ * <p>
+ * The given name is used to name the new column and names containing "." are not handled
+ * differently.
+ * <p>
+ * If type is a nested type, its field IDs are reassigned when added to the existing schema.
+ *
+ * @param parent name of the parent struct to the column will be added to
+ * @param name name for the new column
+ * @param type type for the new column
+ * @return this for method chaining
+ * @throws IllegalArgumentException If parent doesn't identify a struct
+ */
+ default UpdateSchema addRequiredColumn(String parent, String name, Type type) {
+ return addRequiredColumn(parent, name, type, null);
+ }
+
+ /**
+ * Add a new required top-level column.
+ * <p>
+ * This is an incompatible change that can break reading older data. This method will result in an exception unless
+ * {@link #allowIncompatibleChanges()} has been called.
+ * <p>
+ * The parent name is used to find the parent using {@link Schema#findField(String)}. If the
+ * parent name is null, the new column will be added to the root as a top-level column. If parent
+ * identifies a struct, a new column is added to that struct. If it identifies a list, the column
+ * is added to the list element struct, and if it identifies a map, the new column is added to
+ * the map's value struct.
+ * <p>
+ * The given name is used to name the new column and names containing "." are not handled
+ * differently.
+ * <p>
+ * If type is a nested type, its field IDs are reassigned when added to the existing schema.
+ *
+ * @param parent name of the parent struct to the column will be added to
+ * @param name name for the new column
+ * @param type type for the new column
+ * @param doc documentation string for the new column
+ * @return this for method chaining
+ * @throws IllegalArgumentException If parent doesn't identify a struct
+ */
+ UpdateSchema addRequiredColumn(String parent, String name, Type type, String doc);
+
+ /**
* Rename a column in the schema.
* <p>
* The name is used to find the column to rename using {@link Schema#findField(String)}.
@@ -185,6 +295,25 @@ public interface UpdateSchema extends PendingUpdate<Schema> {
UpdateSchema updateColumnDoc(String name, String newDoc);
/**
+ * Update a column to optional.
+ *
+ * @param name name of the column to mark optional
+ * @return this for method chaining
+ */
+ UpdateSchema makeColumnOptional(String name);
+
+ /**
+ * Update a column to required.
+ * <p>
+ * This is an incompatible change that can break reading older data. This method will result in an exception unless
+ * {@link #allowIncompatibleChanges()} has been called.
+ *
+ * @param name name of the column to mark required
+ * @return this for method chaining
+ */
+ UpdateSchema requireColumn(String name);
+
+ /**
* Delete a column in the schema.
* <p>
* The name is used to find the column to delete using {@link Schema#findField(String)}.
diff --git a/api/src/main/java/org/apache/iceberg/catalog/Catalog.java b/api/src/main/java/org/apache/iceberg/catalog/Catalog.java
index dfe2c30..46476bf 100644
--- a/api/src/main/java/org/apache/iceberg/catalog/Catalog.java
+++ b/api/src/main/java/org/apache/iceberg/catalog/Catalog.java
@@ -19,12 +19,15 @@
package org.apache.iceberg.catalog;
+import java.util.List;
import java.util.Map;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
+import org.apache.iceberg.Transaction;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.NoSuchTableException;
+import org.apache.iceberg.exceptions.NotFoundException;
/**
* A Catalog API for table create, drop, and load operations.
@@ -32,6 +35,15 @@ import org.apache.iceberg.exceptions.NoSuchTableException;
public interface Catalog {
/**
+ * Return all the identifiers under this namespace.
+ *
+ * @param namespace a namespace
+ * @return a list of identifiers for tables
+ * @throws NotFoundException if the namespace is not found
+ */
+ List<TableIdentifier> listTables(Namespace namespace);
+
+ /**
* Create a table.
*
* @param identifier a table identifier
@@ -98,6 +110,146 @@ public interface Catalog {
}
/**
+ * Start a transaction to create a table.
+ *
+ * @param identifier a table identifier
+ * @param schema a schema
+ * @param spec a partition spec
+ * @param location a location for the table; leave null if unspecified
+ * @param properties a string map of table properties
+ * @return a {@link Transaction} to create the table
+ * @throws AlreadyExistsException if the table already exists
+ */
+ Transaction newCreateTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ PartitionSpec spec,
+ String location,
+ Map<String, String> properties);
+
+ /**
+ * Start a transaction to create a table.
+ *
+ * @param identifier a table identifier
+ * @param schema a schema
+ * @param spec a partition spec
+ * @param properties a string map of table properties
+ * @return a {@link Transaction} to create the table
+ * @throws AlreadyExistsException if the table already exists
+ */
+ default Transaction newCreateTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ PartitionSpec spec,
+ Map<String, String> properties) {
+ return newCreateTableTransaction(identifier, schema, spec, null, properties);
+ }
+
+ /**
+ * Start a transaction to create a table.
+ *
+ * @param identifier a table identifier
+ * @param schema a schema
+ * @param spec a partition spec
+ * @return a {@link Transaction} to create the table
+ * @throws AlreadyExistsException if the table already exists
+ */
+ default Transaction newCreateTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ PartitionSpec spec) {
+ return newCreateTableTransaction(identifier, schema, spec, null, null);
+ }
+
+ /**
+ * Start a transaction to create a table.
+ *
+ * @param identifier a table identifier
+ * @param schema a schema
+ * @return a {@link Transaction} to create the table
+ * @throws AlreadyExistsException if the table already exists
+ */
+ default Transaction newCreateTableTransaction(
+ TableIdentifier identifier,
+ Schema schema) {
+ return newCreateTableTransaction(identifier, schema, PartitionSpec.unpartitioned(), null, null);
+ }
+
+ /**
+ * Start a transaction to replace a table.
+ *
+ * @param identifier a table identifier
+ * @param schema a schema
+ * @param spec a partition spec
+ * @param location a location for the table; leave null if unspecified
+ * @param properties a string map of table properties
+ * @param orCreate whether to create the table if not exists
+ * @return a {@link Transaction} to replace the table
+ * @throws NoSuchTableException if the table doesn't exist and orCreate is false
+ */
+ Transaction newReplaceTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ PartitionSpec spec,
+ String location,
+ Map<String, String> properties,
+ boolean orCreate);
+
+ /**
+ * Start a transaction to replace a table.
+ *
+ * @param identifier a table identifier
+ * @param schema a schema
+ * @param spec a partition spec
+ * @param properties a string map of table properties
+ * @param orCreate whether to create the table if not exists
+ * @return a {@link Transaction} to replace the table
+ * @throws NoSuchTableException if the table doesn't exist and orCreate is false
+ */
+ default Transaction newReplaceTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ PartitionSpec spec,
+ Map<String, String> properties,
+ boolean orCreate) {
+ return newReplaceTableTransaction(identifier, schema, spec, null, properties, orCreate);
+ }
+
+ /**
+ * Start a transaction to replace a table.
+ *
+ * @param identifier a table identifier
+ * @param schema a schema
+ * @param spec a partition spec
+ * @param orCreate whether to create the table if not exists
+ * @return a {@link Transaction} to replace the table
+ * @throws NoSuchTableException if the table doesn't exist and orCreate is false
+ */
+ default Transaction newReplaceTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ PartitionSpec spec,
+ boolean orCreate) {
+ return newReplaceTableTransaction(identifier, schema, spec, null, null, orCreate);
+ }
+
+ /**
+ * Start a transaction to replace a table.
+ *
+ * @param identifier a table identifier
+ * @param schema a schema
+ * @param orCreate whether to create the table if not exists
+ * @return a {@link Transaction} to replace the table
+ * @throws NoSuchTableException if the table doesn't exist and orCreate is false
+ */
+ default Transaction newReplaceTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ boolean orCreate) {
+ return newReplaceTableTransaction(identifier, schema, PartitionSpec.unpartitioned(), null, null, orCreate);
+ }
+
+ /**
* Check whether table exists.
*
* @param identifier a table identifier
@@ -113,19 +265,33 @@ public interface Catalog {
}
/**
- * Drop a table.
+ * Drop a table and delete all data and metadata files.
*
* @param identifier a table identifier
* @return true if the table was dropped, false if the table did not exist
*/
- boolean dropTable(TableIdentifier identifier);
+ default boolean dropTable(TableIdentifier identifier) {
+ return dropTable(identifier, true /* drop data and metadata files */);
+ }
+
+ /**
+ * Drop a table; optionally delete data and metadata files.
+ * <p>
+ * If purge is set to true the implementation should delete all data and metadata files.
+ *
+ * @param identifier a table identifier
+ * @param purge if true, delete all data and metadata files in the table
+ * @return true if the table was dropped, false if the table did not exist
+ */
+ boolean dropTable(TableIdentifier identifier, boolean purge);
/**
* Rename a table.
*
* @param from identifier of the table to rename
* @param to new table name
- * @throws NoSuchTableException if the table does not exist
+ * @throws NoSuchTableException if the from table does not exist
+ * @throws AlreadyExistsException if the to table already exists
*/
void renameTable(TableIdentifier from, TableIdentifier to);
diff --git a/api/src/main/java/org/apache/iceberg/catalog/Namespace.java b/api/src/main/java/org/apache/iceberg/catalog/Namespace.java
index 739ede3..2202d5f 100644
--- a/api/src/main/java/org/apache/iceberg/catalog/Namespace.java
+++ b/api/src/main/java/org/apache/iceberg/catalog/Namespace.java
@@ -20,6 +20,7 @@
package org.apache.iceberg.catalog;
import com.google.common.base.Joiner;
+import java.util.Arrays;
/**
* A namespace in a {@link Catalog}.
@@ -59,6 +60,25 @@ public class Namespace {
}
@Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+
+ if (other == null || getClass() != other.getClass()) {
+ return false;
+ }
+
+ Namespace namespace = (Namespace) other;
+ return Arrays.equals(levels, namespace.levels);
+ }
+
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(levels);
+ }
+
+ @Override
public String toString() {
return DOT.join(levels);
}
diff --git a/api/src/main/java/org/apache/iceberg/catalog/TableIdentifier.java b/api/src/main/java/org/apache/iceberg/catalog/TableIdentifier.java
index cede38f..0e4d898 100644
--- a/api/src/main/java/org/apache/iceberg/catalog/TableIdentifier.java
+++ b/api/src/main/java/org/apache/iceberg/catalog/TableIdentifier.java
@@ -23,6 +23,7 @@ import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import java.util.Arrays;
+import java.util.Objects;
/**
* Identifies a table in iceberg catalog.
@@ -76,6 +77,26 @@ public class TableIdentifier {
return name;
}
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+
+ if (other == null || getClass() != other.getClass()) {
+ return false;
+ }
+
+ TableIdentifier that = (TableIdentifier) other;
+ return namespace.equals(that.namespace) && name.equals(that.name);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(namespace, name);
+ }
+
+ @Override
public String toString() {
return namespace.toString() + "." + name;
}
diff --git a/api/src/main/java/org/apache/iceberg/exceptions/NotFoundException.java b/api/src/main/java/org/apache/iceberg/exceptions/NotFoundException.java
new file mode 100644
index 0000000..a2d05dd
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/exceptions/NotFoundException.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.exceptions;
+
+/**
+ * Exception raised when attempting to read a file that does not exist.
+ */
+public class NotFoundException extends RuntimeException {
+ public NotFoundException(String message, Object... args) {
+ super(String.format(message, args));
+ }
+
+ public NotFoundException(Throwable cause, String message, Object... args) {
+ super(String.format(message, args), cause);
+ }
+}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java b/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java
new file mode 100644
index 0000000..0bf2d6f
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.expressions;
+
+import com.google.common.base.Preconditions;
+import java.util.Comparator;
+
+public class BoundLiteralPredicate<T> extends BoundPredicate<T> {
+ private final Literal<T> literal;
+
+ BoundLiteralPredicate(Operation op, BoundReference<T> ref, Literal<T> lit) {
+ super(op, ref);
+ Preconditions.checkArgument(op != Operation.IN && op != Operation.NOT_IN,
+ "Bound literal predicate does not support operation: %s", op);
+ this.literal = lit;
+ }
+
+ public Literal<T> literal() {
+ return literal;
+ }
+
+ @Override
+ public boolean isLiteralPredicate() {
+ return true;
+ }
+
+ @Override
+ public BoundLiteralPredicate<T> asLiteralPredicate() {
+ return this;
+ }
+
+ @Override
+ public boolean test(T value) {
+ Comparator<T> cmp = literal.comparator();
+ switch (op()) {
+ case LT:
+ return cmp.compare(value, literal.value()) < 0;
+ case LT_EQ:
+ return cmp.compare(value, literal.value()) <= 0;
+ case GT:
+ return cmp.compare(value, literal.value()) > 0;
+ case GT_EQ:
+ return cmp.compare(value, literal.value()) >= 0;
+ case EQ:
+ return cmp.compare(value, literal.value()) == 0;
+ case NOT_EQ:
+ return cmp.compare(value, literal.value()) != 0;
+ case STARTS_WITH:
+ return String.valueOf(value).startsWith((String) literal.value());
+ default:
+ throw new IllegalStateException("Invalid operation for BoundLiteralPredicate: " + op());
+ }
+ }
+
+ @Override
+ public String toString() {
+ switch (op()) {
+ case LT:
+ return ref() + " < " + literal;
+ case LT_EQ:
+ return ref() + " <= " + literal;
+ case GT:
+ return ref() + " > " + literal;
+ case GT_EQ:
+ return ref() + " >= " + literal;
+ case EQ:
+ return ref() + " == " + literal;
+ case NOT_EQ:
+ return ref() + " != " + literal;
+ case STARTS_WITH:
+ return ref() + " startsWith \"" + literal + "\"";
+ case IN:
+ return ref() + " in { " + literal + " }";
+ case NOT_IN:
+ return ref() + " not in { " + literal + " }";
+ default:
+ return "Invalid literal predicate: operation = " + op();
+ }
+ }
+}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundPredicate.java b/api/src/main/java/org/apache/iceberg/expressions/BoundPredicate.java
index bf65eda..a171bdd 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/BoundPredicate.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/BoundPredicate.java
@@ -19,17 +19,40 @@
package org.apache.iceberg.expressions;
-public class BoundPredicate<T> extends Predicate<T, BoundReference<T>> {
- BoundPredicate(Operation op, BoundReference<T> ref, Literal<T> lit) {
- super(op, ref, lit);
+import org.apache.iceberg.StructLike;
+
+public abstract class BoundPredicate<T> extends Predicate<BoundReference<T>> {
+ protected BoundPredicate(Operation op, BoundReference<T> ref) {
+ super(op, ref);
+ }
+
+ public boolean test(StructLike struct) {
+ return test(ref().get(struct));
+ }
+
+ public abstract boolean test(T value);
+
+ public boolean isUnaryPredicate() {
+ return false;
+ }
+
+ public BoundUnaryPredicate<T> asUnaryPredicate() {
+ throw new IllegalStateException("Not a unary predicate: " + this);
+ }
+
+ public boolean isLiteralPredicate() {
+ return false;
+ }
+
+ public BoundLiteralPredicate<T> asLiteralPredicate() {
+ throw new IllegalStateException("Not a literal predicate: " + this);
}
- BoundPredicate(Operation op, BoundReference<T> ref) {
- super(op, ref, null);
+ public boolean isSetPredicate() {
+ return false;
}
- @Override
- public Expression negate() {
- return new BoundPredicate<>(op().negate(), ref(), literal());
+ public BoundSetPredicate<T> asSetPredicate() {
+ throw new IllegalStateException("Not a set predicate: " + this);
}
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundSetPredicate.java b/api/src/main/java/org/apache/iceberg/expressions/BoundSetPredicate.java
new file mode 100644
index 0000000..5e6eb15
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/expressions/BoundSetPredicate.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.expressions;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import java.util.Set;
+
+public class BoundSetPredicate<T> extends BoundPredicate<T> {
+ private static final Joiner COMMA = Joiner.on(", ");
+ private final Set<T> literalSet;
+
+ BoundSetPredicate(Operation op, BoundReference<T> ref, Set<T> lits) {
+ super(op, ref);
+ Preconditions.checkArgument(op == Operation.IN || op == Operation.NOT_IN,
+ "%s predicate does not support a literal set", op);
+ this.literalSet = lits;
+ }
+
+ @Override
+ public Expression negate() {
+ return new BoundSetPredicate<>(op().negate(), ref(), literalSet);
+ }
+
+ @Override
+ public boolean isSetPredicate() {
+ return true;
+ }
+
+ @Override
+ public BoundSetPredicate<T> asSetPredicate() {
+ return this;
+ }
+
+ public Set<T> literalSet() {
+ return literalSet;
+ }
+
+ public boolean test(T value) {
+ switch (op()) {
+ case IN:
+ return literalSet.contains(value);
+ case NOT_IN:
+ return !literalSet.contains(value);
+ default:
+ throw new IllegalStateException("Invalid operation for BoundSetPredicate: " + op());
+ }
+ }
+
+ @Override
+ public String toString() {
+ switch (op()) {
+ case IN:
+ return ref() + " in (" + COMMA.join(literalSet) + ")";
+ case NOT_IN:
+ return ref() + " not in (" + COMMA.join(literalSet) + ")";
+ default:
+ return "Invalid unary predicate: operation = " + op();
+ }
+ }
+}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundUnaryPredicate.java b/api/src/main/java/org/apache/iceberg/expressions/BoundUnaryPredicate.java
new file mode 100644
index 0000000..1e5b09b
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/expressions/BoundUnaryPredicate.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.expressions;
+
+public class BoundUnaryPredicate<T> extends BoundPredicate<T> {
+ BoundUnaryPredicate(Operation op, BoundReference<T> ref) {
+ super(op, ref);
+ }
+
+ @Override
+ public boolean isUnaryPredicate() {
+ return true;
+ }
+
+ @Override
+ public BoundUnaryPredicate<T> asUnaryPredicate() {
+ return this;
+ }
+
+ @Override
+ public boolean test(T value) {
+ switch (op()) {
+ case IS_NULL:
+ return value == null;
+ case NOT_NULL:
+ return value != null;
+ default:
+ throw new IllegalStateException("Invalid operation for BoundUnaryPredicate: " + op());
+ }
+ }
+
+ @Override
+ public String toString() {
+ switch (op()) {
+ case IS_NULL:
+ return "is_null(" + ref() + ")";
+ case NOT_NULL:
+ return "not_null(" + ref() + ")";
+ default:
+ return "Invalid unary predicate: operation = " + op();
+ }
+ }
+}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java b/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
index e96a528..a12b648 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
@@ -21,12 +21,13 @@ package org.apache.iceberg.expressions;
import java.io.Serializable;
import java.util.Comparator;
+import java.util.Set;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
-import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.StructType;
/**
- * Evaluates an {@link Expression} for data described by a {@link Types.StructType}.
+ * Evaluates an {@link Expression} for data described by a {@link StructType}.
* <p>
* Data rows must implement {@link StructLike} and are passed to {@link #eval(StructLike)}.
* <p>
@@ -43,11 +44,11 @@ public class Evaluator implements Serializable {
return visitors.get();
}
- public Evaluator(Types.StructType struct, Expression unbound) {
+ public Evaluator(StructType struct, Expression unbound) {
this.expr = Binder.bind(struct, unbound, true);
}
- public Evaluator(Types.StructType struct, Expression unbound, boolean caseSensitive) {
+ public Evaluator(StructType struct, Expression unbound, boolean caseSensitive) {
this.expr = Binder.bind(struct, unbound, caseSensitive);
}
@@ -60,7 +61,7 @@ public class Evaluator implements Serializable {
private boolean eval(StructLike row) {
this.struct = row;
- return ExpressionVisitors.visit(expr, this);
+ return ExpressionVisitors.visitEvaluator(expr, this);
}
@Override
@@ -134,13 +135,18 @@ public class Evaluator implements Serializable {
}
@Override
- public <T> Boolean in(BoundReference<T> ref, Literal<T> lit) {
- throw new UnsupportedOperationException("In is not supported yet");
+ public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+ return literalSet.contains(ref.get(struct));
}
@Override
- public <T> Boolean notIn(BoundReference<T> ref, Literal<T> lit) {
- return !in(ref, lit);
+ public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+ return !in(ref, literalSet);
+ }
+
+ @Override
+ public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+ return ((String) ref.get(struct)).startsWith((String) lit.value());
}
}
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expression.java b/api/src/main/java/org/apache/iceberg/expressions/Expression.java
index 124e612..20b1abc 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Expression.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Expression.java
@@ -40,7 +40,8 @@ public interface Expression extends Serializable {
NOT_IN,
NOT,
AND,
- OR;
+ OR,
+ STARTS_WITH;
/**
* @return the operation used when this is negated
diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
index cc45531..d57854d 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
@@ -19,6 +19,8 @@
package org.apache.iceberg.expressions;
+import java.util.Set;
+
/**
* Utils for traversing {@link Expression expressions}.
*/
@@ -89,41 +91,63 @@ public class ExpressionVisitors {
return null;
}
- public <T> R in(BoundReference<T> ref, Literal<T> lit) {
- return null;
+ public <T> R in(BoundReference<T> ref, Set<T> literalSet) {
+ throw new UnsupportedOperationException("In operation is not supported by the visitor");
}
- public <T> R notIn(BoundReference<T> ref, Literal<T> lit) {
- return null;
+ public <T> R notIn(BoundReference<T> ref, Set<T> literalSet) {
+ throw new UnsupportedOperationException("notIn operation is not supported by the visitor");
+ }
+
+ public <T> R startsWith(BoundReference<T> ref, Literal<T> lit) {
+ throw new UnsupportedOperationException("Unsupported operation.");
}
@Override
public <T> R predicate(BoundPredicate<T> pred) {
- switch (pred.op()) {
- case IS_NULL:
- return isNull(pred.ref());
- case NOT_NULL:
- return notNull(pred.ref());
- case LT:
- return lt(pred.ref(), pred.literal());
- case LT_EQ:
- return ltEq(pred.ref(), pred.literal());
- case GT:
- return gt(pred.ref(), pred.literal());
- case GT_EQ:
- return gtEq(pred.ref(), pred.literal());
- case EQ:
- return eq(pred.ref(), pred.literal());
- case NOT_EQ:
- return notEq(pred.ref(), pred.literal());
- case IN:
- return in(pred.ref(), pred.literal());
- case NOT_IN:
- return notIn(pred.ref(), pred.literal());
- default:
- throw new UnsupportedOperationException(
- "Unknown operation for predicate: " + pred.op());
+ if (pred.isLiteralPredicate()) {
+ BoundLiteralPredicate<T> literalPred = pred.asLiteralPredicate();
+ switch (pred.op()) {
+ case LT:
+ return lt(pred.ref(), literalPred.literal());
+ case LT_EQ:
+ return ltEq(pred.ref(), literalPred.literal());
+ case GT:
+ return gt(pred.ref(), literalPred.literal());
+ case GT_EQ:
+ return gtEq(pred.ref(), literalPred.literal());
+ case EQ:
+ return eq(pred.ref(), literalPred.literal());
+ case NOT_EQ:
+ return notEq(pred.ref(), literalPred.literal());
+ case STARTS_WITH:
+ return startsWith(pred.ref(), literalPred.literal());
+ default:
+ throw new IllegalStateException("Invalid operation for BoundLiteralPredicate: " + pred.op());
+ }
+
+ } else if (pred.isUnaryPredicate()) {
+ switch (pred.op()) {
+ case IS_NULL:
+ return isNull(pred.ref());
+ case NOT_NULL:
+ return notNull(pred.ref());
+ default:
+ throw new IllegalStateException("Invalid operation for BoundUnaryPredicate: " + pred.op());
+ }
+
+ } else if (pred.isSetPredicate()) {
+ switch (pred.op()) {
+ case IN:
+ return in(pred.ref(), pred.asSetPredicate().literalSet());
+ case NOT_IN:
+ return notIn(pred.ref(), pred.asSetPredicate().literalSet());
+ default:
+ throw new IllegalStateException("Invalid operation for BoundSetPredicate: " + pred.op());
+ }
}
+
+ throw new IllegalStateException("Unsupported bound predicate: " + pred.getClass().getName());
}
@Override
@@ -143,7 +167,6 @@ public class ExpressionVisitors {
* @param <R> the return type produced by the expression visitor
* @return the value returned by the visitor for the root expression node
*/
- @SuppressWarnings("unchecked")
public static <R> R visit(Expression expr, ExpressionVisitor<R> visitor) {
if (expr instanceof Predicate) {
if (expr instanceof BoundPredicate) {
@@ -172,4 +195,52 @@ public class ExpressionVisitors {
}
}
}
+
+ /**
+ * Traverses the given {@link Expression expression} with a {@link ExpressionVisitor visitor}.
+ * <p>
+ * The visitor will be called to handle only nodes required for determining result
+ * in the expression tree in postfix order. Result values produced by child nodes
+ * are passed when parent nodes are handled.
+ *
+ * @param expr an expression to traverse
+ * @param visitor a visitor that will be called to handle each node in the expression tree
+ * @return the value returned by the visitor for the root expression node
+ */
+ public static Boolean visitEvaluator(Expression expr, ExpressionVisitor<Boolean> visitor) {
+ if (expr instanceof Predicate) {
+ if (expr instanceof BoundPredicate) {
+ return visitor.predicate((BoundPredicate<?>) expr);
+ } else {
+ return visitor.predicate((UnboundPredicate<?>) expr);
+ }
+ } else {
+ switch (expr.op()) {
+ case TRUE:
+ return visitor.alwaysTrue();
+ case FALSE:
+ return visitor.alwaysFalse();
+ case NOT:
+ Not not = (Not) expr;
+ return visitor.not(visitEvaluator(not.child(), visitor));
+ case AND:
+ And and = (And) expr;
+ Boolean andLeftOperand = visitEvaluator(and.left(), visitor);
+ if (!andLeftOperand) {
+ return visitor.alwaysFalse();
+ }
+ return visitor.and(Boolean.TRUE, visitEvaluator(and.right(), visitor));
+ case OR:
+ Or or = (Or) expr;
+ Boolean orLeftOperand = visitEvaluator(or.left(), visitor);
+ if (orLeftOperand) {
+ return visitor.alwaysTrue();
+ }
+ return visitor.or(Boolean.FALSE, visitEvaluator(or.right(), visitor));
+ default:
+ throw new UnsupportedOperationException(
+ "Unknown operation: " + expr.op());
+ }
+ }
+ }
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
index d280f89..d62974b 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
@@ -20,6 +20,7 @@
package org.apache.iceberg.expressions;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
import java.util.stream.Stream;
import org.apache.iceberg.expressions.Expression.Operation;
@@ -105,10 +106,30 @@ public class Expressions {
return new UnboundPredicate<>(Expression.Operation.NOT_EQ, ref(name), value);
}
+ public static UnboundPredicate<String> startsWith(String name, String value) {
+ return new UnboundPredicate<>(Expression.Operation.STARTS_WITH, ref(name), value);
+ }
+
+ public static <T> UnboundPredicate<T> in(String name, T... values) {
+ return predicate(Operation.IN, name, Lists.newArrayList(values));
+ }
+
+ public static <T> UnboundPredicate<T> in(String name, Iterable<T> values) {
+ Preconditions.checkNotNull(values, "Values cannot be null for IN predicate.");
+ return predicate(Operation.IN, name, values);
+ }
+
+ public static <T> UnboundPredicate<T> notIn(String name, T... values) {
+ return predicate(Operation.NOT_IN, name, Lists.newArrayList(values));
+ }
+
+ public static <T> UnboundPredicate<T> notIn(String name, Iterable<T> values) {
+ Preconditions.checkNotNull(values, "Values cannot be null for NOT_IN predicate.");
+ return predicate(Operation.NOT_IN, name, values);
+ }
+
public static <T> UnboundPredicate<T> predicate(Operation op, String name, T value) {
- Preconditions.checkArgument(op != Operation.IS_NULL && op != Operation.NOT_NULL,
- "Cannot create %s predicate inclusive a value", op);
- return new UnboundPredicate<>(op, ref(name), value);
+ return predicate(op, name, Literals.from(value));
}
public static <T> UnboundPredicate<T> predicate(Operation op, String name, Literal<T> lit) {
@@ -117,6 +138,10 @@ public class Expressions {
return new UnboundPredicate<>(op, ref(name), lit);
}
+ public static <T> UnboundPredicate<T> predicate(Operation op, String name, Iterable<T> values) {
+ return new UnboundPredicate<>(op, ref(name), values);
+ }
+
public static <T> UnboundPredicate<T> predicate(Operation op, String name) {
Preconditions.checkArgument(op == Operation.IS_NULL || op == Operation.NOT_NULL,
"Cannot create %s predicate without a value", op);
diff --git a/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
index 64d34e9..6da73ac 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
@@ -20,12 +20,16 @@
package org.apache.iceberg.expressions;
import java.nio.ByteBuffer;
+import java.util.Comparator;
import java.util.Map;
+import java.util.Set;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.Schema;
import org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
+import org.apache.iceberg.types.Comparators;
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Types.StructType;
+import org.apache.iceberg.util.BinaryUtil;
import static org.apache.iceberg.expressions.Expressions.rewriteNot;
@@ -52,7 +56,7 @@ public class InclusiveMetricsEvaluator {
return visitors.get();
}
- InclusiveMetricsEvaluator(Schema schema, Expression unbound) {
+ public InclusiveMetricsEvaluator(Schema schema, Expression unbound) {
this(schema, unbound, true);
}
@@ -93,7 +97,7 @@ public class InclusiveMetricsEvaluator {
this.lowerBounds = file.lowerBounds();
this.upperBounds = file.upperBounds();
- return ExpressionVisitors.visit(expr, this);
+ return ExpressionVisitors.visitEvaluator(expr, this);
}
@Override
@@ -140,9 +144,7 @@ public class InclusiveMetricsEvaluator {
// if the column has no non-null values, the expression cannot match
Integer id = ref.fieldId();
- if (valueCounts != null && valueCounts.containsKey(id) &&
- nullCounts != null && nullCounts.containsKey(id) &&
- valueCounts.get(id) - nullCounts.get(id) == 0) {
+ if (containsNullsOnly(id)) {
return ROWS_CANNOT_MATCH;
}
@@ -153,6 +155,10 @@ public class InclusiveMetricsEvaluator {
public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
Integer id = ref.fieldId();
+ if (containsNullsOnly(id)) {
+ return ROWS_CANNOT_MATCH;
+ }
+
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
@@ -169,6 +175,10 @@ public class InclusiveMetricsEvaluator {
public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
Integer id = ref.fieldId();
+ if (containsNullsOnly(id)) {
+ return ROWS_CANNOT_MATCH;
+ }
+
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
@@ -185,6 +195,10 @@ public class InclusiveMetricsEvaluator {
public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
Integer id = ref.fieldId();
+ if (containsNullsOnly(id)) {
+ return ROWS_CANNOT_MATCH;
+ }
+
if (upperBounds != null && upperBounds.containsKey(id)) {
T upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
@@ -201,6 +215,10 @@ public class InclusiveMetricsEvaluator {
public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
Integer id = ref.fieldId();
+ if (containsNullsOnly(id)) {
+ return ROWS_CANNOT_MATCH;
+ }
+
if (upperBounds != null && upperBounds.containsKey(id)) {
T upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
@@ -217,6 +235,10 @@ public class InclusiveMetricsEvaluator {
public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
Integer id = ref.fieldId();
+ if (containsNullsOnly(id)) {
+ return ROWS_CANNOT_MATCH;
+ }
+
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
@@ -246,13 +268,54 @@ public class InclusiveMetricsEvaluator {
}
@Override
- public <T> Boolean in(BoundReference<T> ref, Literal<T> lit) {
+ public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
return ROWS_MIGHT_MATCH;
}
@Override
- public <T> Boolean notIn(BoundReference<T> ref, Literal<T> lit) {
+ public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+ Integer id = ref.fieldId();
+
+ if (containsNullsOnly(id)) {
+ return ROWS_CANNOT_MATCH;
+ }
+
+ ByteBuffer prefixAsBytes = lit.toByteBuffer();
+
+ Comparator<ByteBuffer> comparator = Comparators.unsignedBytes();
+
+ if (lowerBounds != null && lowerBounds.containsKey(id)) {
+ ByteBuffer lower = lowerBounds.get(id);
+ // truncate lower bound so that its length in bytes is not greater than the length of prefix
+ int length = Math.min(prefixAsBytes.remaining(), lower.remaining());
+ int cmp = comparator.compare(BinaryUtil.truncateBinary(lower, length), prefixAsBytes);
+ if (cmp > 0) {
+ return ROWS_CANNOT_MATCH;
+ }
+ }
+
+ if (upperBounds != null && upperBounds.containsKey(id)) {
+ ByteBuffer upper = upperBounds.get(id);
+ // truncate upper bound so that its length in bytes is not greater than the length of prefix
+ int length = Math.min(prefixAsBytes.remaining(), upper.remaining());
+ int cmp = comparator.compare(BinaryUtil.truncateBinary(upper, length), prefixAsBytes);
+ if (cmp < 0) {
+ return ROWS_CANNOT_MATCH;
+ }
+ }
+
return ROWS_MIGHT_MATCH;
}
+
+ private boolean containsNullsOnly(Integer id) {
+ return valueCounts != null && valueCounts.containsKey(id) &&
+ nullCounts != null && nullCounts.containsKey(id) &&
+ valueCounts.get(id) - nullCounts.get(id) == 0;
+ }
}
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literal.java b/api/src/main/java/org/apache/iceberg/expressions/Literal.java
index b4642e3..4c9935f 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Literal.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Literal.java
@@ -102,4 +102,14 @@ public interface Literal<T> extends Serializable {
* @return a comparator for T objects
*/
Comparator<T> comparator();
+
+ /**
+ * Serializes the value wrapped by this literal to binary using the single-value serialization
+ * format described in the Iceberg table specification.
+ *
+ * @return a ByteBuffer that contains the serialized literal value.
+ */
+ default ByteBuffer toByteBuffer() {
+ throw new UnsupportedOperationException("toByteBuffer is not supported");
+ }
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java
index 3216e0e..31dfbac 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java
@@ -33,8 +33,10 @@ import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.Comparator;
+import java.util.Objects;
import java.util.UUID;
import org.apache.iceberg.types.Comparators;
+import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
@@ -94,6 +96,7 @@ class Literals {
private abstract static class BaseLiteral<T> implements Literal<T> {
private final T value;
+ private transient volatile ByteBuffer byteBuffer = null;
BaseLiteral(T value) {
Preconditions.checkNotNull(value, "Literal values cannot be null");
@@ -106,9 +109,43 @@ class Literals {
}
@Override
+ public final ByteBuffer toByteBuffer() {
+ if (byteBuffer == null) {
+ synchronized (this) {
+ if (byteBuffer == null) {
+ byteBuffer = Conversions.toByteBuffer(typeId(), value());
+ }
+ }
+ }
+ return byteBuffer;
+ }
+
+ protected abstract Type.TypeID typeId();
+
+ @Override
public String toString() {
return String.valueOf(value);
}
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (other == null || getClass() != other.getClass()) {
+ return false;
+ }
+ BaseLiteral<T> that = (BaseLiteral<T>) other;
+
+ return comparator().compare(value(), that.value()) == 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hashCode(value);
+ }
+
}
private abstract static class ComparableLiteral<C extends Comparable<C>> extends BaseLiteral<C> {
@@ -194,6 +231,11 @@ class Literals {
}
return null;
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.BOOLEAN;
+ }
}
static class IntegerLiteral extends ComparableLiteral<Integer> {
@@ -224,6 +266,11 @@ class Literals {
return null;
}
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.INTEGER;
+ }
}
static class LongLiteral extends ComparableLiteral<Long> {
@@ -261,6 +308,11 @@ class Literals {
return null;
}
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.LONG;
+ }
}
static class FloatLiteral extends ComparableLiteral<Float> {
@@ -284,6 +336,11 @@ class Literals {
return null;
}
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.FLOAT;
+ }
}
static class DoubleLiteral extends ComparableLiteral<Double> {
@@ -314,6 +371,11 @@ class Literals {
return null;
}
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.DOUBLE;
+ }
}
static class DateLiteral extends ComparableLiteral<Integer> {
@@ -329,6 +391,11 @@ class Literals {
}
return null;
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.DATE;
+ }
}
static class TimeLiteral extends ComparableLiteral<Long> {
@@ -344,6 +411,11 @@ class Literals {
}
return null;
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.TIME;
+ }
}
static class TimestampLiteral extends ComparableLiteral<Long> {
@@ -364,6 +436,11 @@ class Literals {
}
return null;
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.TIMESTAMP;
+ }
}
static class DecimalLiteral extends ComparableLiteral<BigDecimal> {
@@ -385,6 +462,11 @@ class Literals {
return null;
}
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.DECIMAL;
+ }
}
static class StringLiteral extends BaseLiteral<CharSequence> {
@@ -446,6 +528,11 @@ class Literals {
}
@Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.STRING;
+ }
+
+ @Override
public String toString() {
return "\"" + value() + "\"";
}
@@ -464,6 +551,11 @@ class Literals {
}
return null;
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.UUID;
+ }
}
static class FixedLiteral extends BaseLiteral<ByteBuffer> {
@@ -496,6 +588,11 @@ class Literals {
return CMP;
}
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.FIXED;
+ }
+
Object writeReplace() throws ObjectStreamException {
return new SerializationProxies.FixedLiteralProxy(value());
}
@@ -534,5 +631,10 @@ class Literals {
Object writeReplace() throws ObjectStreamException {
return new SerializationProxies.BinaryLiteralProxy(value());
}
+
+ @Override
+ protected Type.TypeID typeId() {
+ return Type.TypeID.BINARY;
+ }
}
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java
index 85bac1c..9f23c03 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java
@@ -20,14 +20,18 @@
package org.apache.iceberg.expressions;
import java.nio.ByteBuffer;
+import java.util.Comparator;
import java.util.List;
+import java.util.Set;
import org.apache.iceberg.Accessors;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.ManifestFile.PartitionFieldSummary;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
+import org.apache.iceberg.types.Comparators;
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Types.StructType;
+import org.apache.iceberg.util.BinaryUtil;
import static org.apache.iceberg.expressions.Expressions.rewriteNot;
@@ -89,7 +93,7 @@ public class ManifestEvaluator {
return ROWS_MIGHT_MATCH;
}
- return ExpressionVisitors.visit(expr, this);
+ return ExpressionVisitors.visitEvaluator(expr, this);
}
@Override
@@ -245,12 +249,44 @@ public class ManifestEvaluator {
}
@Override
- public <T> Boolean in(BoundReference<T> ref, Literal<T> lit) {
+ public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
return ROWS_MIGHT_MATCH;
}
@Override
- public <T> Boolean notIn(BoundReference<T> ref, Literal<T> lit) {
+ public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+ int pos = Accessors.toPosition(ref.accessor());
+ PartitionFieldSummary fieldStats = stats.get(pos);
+
+ if (fieldStats.lowerBound() == null) {
+ return ROWS_CANNOT_MATCH; // values are all null and literal cannot contain null
+ }
+
+ ByteBuffer prefixAsBytes = lit.toByteBuffer();
+
+ Comparator<ByteBuffer> comparator = Comparators.unsignedBytes();
+
+ ByteBuffer lower = fieldStats.lowerBound();
+ // truncate lower bound so that its length in bytes is not greater than the length of prefix
+ int lowerLength = Math.min(prefixAsBytes.remaining(), lower.remaining());
+ int lowerCmp = comparator.compare(BinaryUtil.truncateBinary(lower, lowerLength), prefixAsBytes);
+ if (lowerCmp > 0) {
+ return ROWS_CANNOT_MATCH;
+ }
+
+ ByteBuffer upper = fieldStats.upperBound();
+ // truncate upper bound so that its length in bytes is not greater than the length of prefix
+ int upperLength = Math.min(prefixAsBytes.remaining(), upper.remaining());
+ int upperCmp = comparator.compare(BinaryUtil.truncateBinary(upper, upperLength), prefixAsBytes);
+ if (upperCmp < 0) {
+ return ROWS_CANNOT_MATCH;
+ }
+
return ROWS_MIGHT_MATCH;
}
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Predicate.java b/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
index 12923ef..b169a37 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
@@ -19,15 +19,13 @@
package org.apache.iceberg.expressions;
-public abstract class Predicate<T, R extends Reference> implements Expression {
+public abstract class Predicate<R extends Reference> implements Expression {
private final Operation op;
private final R ref;
- private final Literal<T> literal;
- Predicate(Operation op, R ref, Literal<T> lit) {
+ Predicate(Operation op, R ref) {
this.op = op;
this.ref = ref;
- this.literal = lit;
}
@Override
@@ -38,36 +36,4 @@ public abstract class Predicate<T, R extends Reference> implements Expression {
public R ref() {
return ref;
}
-
- public Literal<T> literal() {
- return literal;
- }
-
- @Override
- public String toString() {
- switch (op) {
- case IS_NULL:
- return "is_null(" + ref() + ")";
- case NOT_NULL:
- return "not_null(" + ref() + ")";
- case LT:
- return String.valueOf(ref()) + " < " + literal();
- case LT_EQ:
- return String.valueOf(ref()) + " <= " + literal();
- case GT:
- return String.valueOf(ref()) + " > " + literal();
- case GT_EQ:
- return String.valueOf(ref()) + " >= " + literal();
- case EQ:
- return String.valueOf(ref()) + " == " + literal();
- case NOT_EQ:
- return String.valueOf(ref()) + " != " + literal();
-// case IN:
-// break;
-// case NOT_IN:
-// break;
- default:
- return "Invalid predicate: operation = " + op;
- }
- }
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
index dd3a0b8..64315c8 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
@@ -195,6 +195,11 @@ public class ResidualEvaluator implements Serializable {
}
@Override
+ public <T> Expression startsWith(BoundReference<T> ref, Literal<T> lit) {
+ return ((String) ref.get(struct)).startsWith((String) lit.value()) ? alwaysTrue() : alwaysFalse();
+ }
+
+ @Override
@SuppressWarnings("unchecked")
public <T> Expression predicate(BoundPredicate<T> pred) {
/**
diff --git a/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java
index 6af5b21..193582a 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java
@@ -22,6 +22,7 @@ package org.apache.iceberg.expressions;
import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import java.util.Map;
+import java.util.Set;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.Schema;
import org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
@@ -90,7 +91,7 @@ public class StrictMetricsEvaluator {
this.lowerBounds = file.lowerBounds();
this.upperBounds = file.upperBounds();
- return ExpressionVisitors.visit(expr, this);
+ return ExpressionVisitors.visitEvaluator(expr, this);
}
@Override
@@ -126,9 +127,7 @@ public class StrictMetricsEvaluator {
Preconditions.checkNotNull(struct.field(id),
"Cannot filter by nested column: %s", schema.findField(id));
- if (valueCounts != null && valueCounts.containsKey(id) &&
- nullCounts != null && nullCounts.containsKey(id) &&
- valueCounts.get(id) - nullCounts.get(id) == 0) {
+ if (containsNullsOnly(id)) {
return ROWS_MUST_MATCH;
}
@@ -157,6 +156,10 @@ public class StrictMetricsEvaluator {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
+ if (canContainNulls(id)) {
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
if (upperBounds != null && upperBounds.containsKey(id)) {
T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id));
@@ -176,6 +179,10 @@ public class StrictMetricsEvaluator {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
+ if (canContainNulls(id)) {
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
if (upperBounds != null && upperBounds.containsKey(id)) {
T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id));
@@ -195,6 +202,10 @@ public class StrictMetricsEvaluator {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
+ if (canContainNulls(id)) {
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(field.type(), lowerBounds.get(id));
@@ -214,6 +225,10 @@ public class StrictMetricsEvaluator {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
+ if (canContainNulls(id)) {
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(field.type(), lowerBounds.get(id));
@@ -233,6 +248,10 @@ public class StrictMetricsEvaluator {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
+ if (canContainNulls(id)) {
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
if (lowerBounds != null && lowerBounds.containsKey(id) &&
upperBounds != null && upperBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
@@ -262,6 +281,10 @@ public class StrictMetricsEvaluator {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
+ if (containsNullsOnly(id)) {
+ return ROWS_MUST_MATCH;
+ }
+
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
@@ -284,13 +307,28 @@ public class StrictMetricsEvaluator {
}
@Override
- public <T> Boolean in(BoundReference<T> ref, Literal<T> lit) {
+ public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
return ROWS_MIGHT_NOT_MATCH;
}
@Override
- public <T> Boolean notIn(BoundReference<T> ref, Literal<T> lit) {
+ public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
return ROWS_MIGHT_NOT_MATCH;
}
+
+ private boolean canContainNulls(Integer id) {
+ return nullCounts == null || nullCounts.containsKey(id) && nullCounts.get(id) > 0;
+ }
+
+ private boolean containsNullsOnly(Integer id) {
+ return valueCounts != null && valueCounts.containsKey(id) &&
+ nullCounts != null && nullCounts.containsKey(id) &&
+ valueCounts.get(id) - nullCounts.get(id) == 0;
+ }
}
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java b/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java
index 13fdffb..008af7e 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java
@@ -19,30 +19,62 @@
package org.apache.iceberg.expressions;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import java.util.List;
+import java.util.Set;
import org.apache.iceberg.Schema;
import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.StructType;
+import org.apache.iceberg.util.CharSequenceSet;
-import static org.apache.iceberg.expressions.Expression.Operation.IS_NULL;
-import static org.apache.iceberg.expressions.Expression.Operation.NOT_NULL;
+public class UnboundPredicate<T> extends Predicate<NamedReference> {
+ private static final Joiner COMMA = Joiner.on(", ");
-public class UnboundPredicate<T> extends Predicate<T, NamedReference> {
+ private final List<Literal<T>> literals;
UnboundPredicate(Operation op, NamedReference namedRef, T value) {
- super(op, namedRef, Literals.from(value));
+ this(op, namedRef, Literals.from(value));
}
UnboundPredicate(Operation op, NamedReference namedRef) {
- super(op, namedRef, null);
+ super(op, namedRef);
+ this.literals = null;
}
UnboundPredicate(Operation op, NamedReference namedRef, Literal<T> lit) {
- super(op, namedRef, lit);
+ super(op, namedRef);
+ this.literals = Lists.newArrayList(lit);
+ }
+
+ UnboundPredicate(Operation op, NamedReference namedRef, Iterable<T> values) {
+ super(op, namedRef);
+ this.literals = Lists.newArrayList(Iterables.transform(values, Literals::from));
+ }
+
+ private UnboundPredicate(Operation op, NamedReference namedRef, List<Literal<T>> literals) {
+ super(op, namedRef);
+ this.literals = literals;
}
@Override
public Expression negate() {
- return new UnboundPredicate<>(op().negate(), ref(), literal());
+ return new UnboundPredicate<>(op().negate(), ref(), literals);
+ }
+
+ public Literal<T> literal() {
+ Preconditions.checkArgument(op() != Operation.IN && op() != Operation.NOT_IN,
+ "%s predicate cannot return a literal", op());
+ return literals == null ? null : Iterables.getOnlyElement(literals);
+ }
+
+ public List<Literal<T>> literals() {
+ return literals;
}
/**
@@ -50,23 +82,23 @@ public class UnboundPredicate<T> extends Predicate<T, NamedReference> {
*
* Access modifier is package-private, to only allow use from existing tests.
*
- * @param struct The {@link Types.StructType struct type} to resolve references by name.
+ * @param struct The {@link StructType struct type} to resolve references by name.
* @return an {@link Expression}
* @throws ValidationException if literals do not match bound references, or if comparison on expression is invalid
*/
- Expression bind(Types.StructType struct) {
+ Expression bind(StructType struct) {
return bind(struct, true);
}
/**
* Bind this UnboundPredicate.
*
- * @param struct The {@link Types.StructType struct type} to resolve references by name.
+ * @param struct The {@link StructType struct type} to resolve references by name.
* @param caseSensitive A boolean flag to control whether the bind should enforce case sensitivity.
* @return an {@link Expression}
* @throws ValidationException if literals do not match bound references, or if comparison on expression is invalid
*/
- public Expression bind(Types.StructType struct, boolean caseSensitive) {
+ public Expression bind(StructType struct, boolean caseSensitive) {
Schema schema = new Schema(struct.fields());
Types.NestedField field = caseSensitive ?
schema.findField(ref().name()) :
@@ -75,30 +107,43 @@ public class UnboundPredicate<T> extends Predicate<T, NamedReference> {
ValidationException.check(field != null,
"Cannot find field '%s' in struct: %s", ref().name(), schema.asStruct());
- if (literal() == null) {
- switch (op()) {
- case IS_NULL:
- if (field.isRequired()) {
- return Expressions.alwaysFalse();
- }
- return new BoundPredicate<>(IS_NULL, new BoundReference<>(field.fieldId(),
- schema.accessorForField(field.fieldId())));
- case NOT_NULL:
- if (field.isRequired()) {
- return Expressions.alwaysTrue();
- }
- return new BoundPredicate<>(NOT_NULL, new BoundReference<>(field.fieldId(),
- schema.accessorForField(field.fieldId())));
- default:
- throw new ValidationException("Operation must be IS_NULL or NOT_NULL");
- }
+ BoundReference<T> ref = new BoundReference<>(field.fieldId(), schema.accessorForField(field.fieldId()));
+
+ if (literals == null) {
+ return bindUnaryOperation(ref, field.isRequired());
+ }
+
+ if (op() == Operation.IN || op() == Operation.NOT_IN) {
+ return bindInOperation(ref, field.type(), op());
}
- Literal<T> lit = literal().to(field.type());
+ return bindLiteralOperation(ref, field.type());
+ }
+
+ private Expression bindUnaryOperation(BoundReference<T> ref, boolean isRequired) {
+ switch (op()) {
+ case IS_NULL:
+ if (isRequired) {
+ return Expressions.alwaysFalse();
+ }
+ return new BoundUnaryPredicate<>(Operation.IS_NULL, ref);
+ case NOT_NULL:
+ if (isRequired) {
+ return Expressions.alwaysTrue();
+ }
+ return new BoundUnaryPredicate<>(Operation.NOT_NULL, ref);
+ default:
+ throw new ValidationException("Operation must be IS_NULL or NOT_NULL");
+ }
+ }
+
+ private Expression bindLiteralOperation(BoundReference<T> ref, Type type) {
+ Literal<T> lit = literal().to(type);
+
if (lit == null) {
throw new ValidationException(String.format(
- "Invalid value for comparison inclusive type %s: %s (%s)",
- field.type(), literal().value(), literal().value().getClass().getName()));
+ "Invalid value for conversion to type %s: %s (%s)",
+ type, literal().value(), literal().value().getClass().getName()));
} else if (lit == Literals.aboveMax()) {
switch (op()) {
@@ -110,10 +155,6 @@ public class UnboundPredicate<T> extends Predicate<T, NamedReference> {
case GT_EQ:
case EQ:
return Expressions.alwaysFalse();
-// case IN:
-// break;
-// case NOT_IN:
-// break;
}
} else if (lit == Literals.belowMin()) {
switch (op()) {
@@ -125,13 +166,87 @@ public class UnboundPredicate<T> extends Predicate<T, NamedReference> {
case LT_EQ:
case EQ:
return Expressions.alwaysFalse();
-// case IN:
-// break;
-// case NOT_IN:
-// break;
}
}
- return new BoundPredicate<>(op(), new BoundReference<>(field.fieldId(),
- schema.accessorForField(field.fieldId())), lit);
+
+ return new BoundLiteralPredicate<>(op(), ref, lit);
+ }
+
+ private Expression bindInOperation(BoundReference<T> ref, Type type, Operation op) {
+ List<Literal<T>> convertedLiterals = Lists.newArrayList(Iterables.filter(
+ Lists.transform(literals, lit -> {
+ Literal<T> converted = lit.to(type);
+ ValidationException.check(converted != null,
+ "Invalid value for conversion to type %s: %s (%s)", type, lit, lit.getClass().getName());
+ return converted;
+ }),
+ lit -> lit != Literals.aboveMax() && lit != Literals.belowMin()));
+
+ if (convertedLiterals.isEmpty()) {
+ switch (op) {
+ case IN:
+ return Expressions.alwaysFalse();
+ case NOT_IN:
+ return Expressions.alwaysTrue();
+ default:
+ throw new ValidationException("Operation must be IN or NOT_IN");
+ }
+ }
+
+ Set<T> literalSet = setOf(convertedLiterals);
+ if (literalSet.size() == 1) {
+ switch (op) {
+ case IN:
+ return new BoundLiteralPredicate<>(Operation.EQ, ref, Iterables.get(convertedLiterals, 0));
+ case NOT_IN:
+ return new BoundLiteralPredicate<>(Operation.NOT_EQ, ref, Iterables.get(convertedLiterals, 0));
+ default:
+ throw new ValidationException("Operation must be IN or NOT_IN");
+ }
+ }
+
+ return new BoundSetPredicate<>(op, ref, literalSet);
+ }
+
+ @Override
+ public String toString() {
+ switch (op()) {
+ case IS_NULL:
+ return "is_null(" + ref() + ")";
+ case NOT_NULL:
+ return "not_null(" + ref() + ")";
+ case LT:
+ return ref() + " < " + literal();
+ case LT_EQ:
+ return ref() + " <= " + literal();
+ case GT:
+ return ref() + " > " + literal();
+ case GT_EQ:
+ return ref() + " >= " + literal();
+ case EQ:
+ return ref() + " == " + literal();
+ case NOT_EQ:
+ return ref() + " != " + literal();
+ case STARTS_WITH:
+ return ref() + " startsWith \"" + literal() + "\"";
+ case IN:
+ return ref() + " in (" + COMMA.join(literals()) + ")";
+ case NOT_IN:
+ return ref() + " not in (" + COMMA.join(literals()) + ")";
+ default:
+ return "Invalid predicate: operation = " + op();
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ static <T> Set<T> setOf(Iterable<Literal<T>> literals) {
+ Literal<T> lit = Iterables.get(literals, 0);
+ if (lit instanceof Literals.StringLiteral && lit.value() instanceof CharSequence) {
+ Iterable<T> values = Iterables.transform(literals, Literal::value);
+ Iterable<CharSequence> charSeqs = Iterables.transform(values, val -> (CharSequence) val);
+ return (Set<T>) CharSequenceSet.of(charSeqs);
+ } else {
+ return Sets.newHashSet(Iterables.transform(literals, Literal::value));
+ }
}
}
diff --git a/api/src/main/java/org/apache/iceberg/io/InputFile.java b/api/src/main/java/org/apache/iceberg/io/InputFile.java
index 7d21f73..70b1e9f 100644
--- a/api/src/main/java/org/apache/iceberg/io/InputFile.java
+++ b/api/src/main/java/org/apache/iceberg/io/InputFile.java
@@ -20,6 +20,7 @@
package org.apache.iceberg.io;
import java.io.IOException;
+import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.exceptions.RuntimeIOException;
/**
@@ -38,6 +39,7 @@ public interface InputFile {
* Opens a new {@link SeekableInputStream} for the underlying data file
*
* @return a seekable stream for reading the file
+ * @throws NotFoundException If the file does not exist
* @throws RuntimeIOException If the implementation throws an {@link IOException}
*/
SeekableInputStream newStream();
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
index 9448d02..c29ffb0 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
@@ -29,7 +29,9 @@ import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Set;
import java.util.UUID;
+import org.apache.iceberg.expressions.BoundLiteralPredicate;
import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.BoundUnaryPredicate;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.types.Type;
@@ -79,6 +81,9 @@ abstract class Bucket<T> implements Transform<T, Integer> {
@Override
public Integer apply(T value) {
+ if (value == null) {
+ return null;
+ }
return (hash(value) & Integer.MAX_VALUE) % numBuckets;
}
@@ -86,8 +91,7 @@ abstract class Bucket<T> implements Transform<T, Integer> {
public boolean equals(Object o) {
if (this == o) {
return true;
- }
- if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof Bucket)) {
return false;
}
@@ -107,31 +111,46 @@ abstract class Bucket<T> implements Transform<T, Integer> {
@Override
public UnboundPredicate<Integer> project(String name, BoundPredicate<T> predicate) {
- switch (predicate.op()) {
- case EQ:
- return Expressions.predicate(
- predicate.op(), name, apply(predicate.literal().value()));
+ if (predicate instanceof BoundUnaryPredicate) {
+ return Expressions.predicate(predicate.op(), name);
+ } else if (predicate instanceof BoundLiteralPredicate) {
+ BoundLiteralPredicate<T> pred = predicate.asLiteralPredicate();
+ switch (pred.op()) {
+ case EQ:
+ return Expressions.predicate(
+ pred.op(), name, apply(pred.literal().value()));
// case IN:
// return Expressions.predicate();
- default:
- // comparison predicates can't be projected, notEq can't be projected
- // TODO: small ranges can be projected.
- // for example, (x > 0) and (x < 3) can be turned into in({1, 2}) and projected.
- return null;
+ case STARTS_WITH:
+ default:
+ // comparison predicates can't be projected, notEq can't be projected
+ // TODO: small ranges can be projected.
+ // for example, (x > 0) and (x < 3) can be turned into in({1, 2}) and projected.
+ return null;
+ }
}
+
+ return null;
}
@Override
public UnboundPredicate<Integer> projectStrict(String name, BoundPredicate<T> predicate) {
- switch (predicate.op()) {
- case NOT_EQ: // TODO: need to translate not(eq(...)) into notEq in expressions
- return Expressions.predicate(predicate.op(), name, apply(predicate.literal().value()));
+ if (predicate instanceof BoundUnaryPredicate) {
+ return Expressions.predicate(predicate.op(), name);
+ } else if (predicate instanceof BoundLiteralPredicate) {
+ BoundLiteralPredicate<T> pred = predicate.asLiteralPredicate();
+ switch (pred.op()) {
+ case NOT_EQ: // TODO: need to translate not(eq(...)) into notEq in expressions
+ return Expressions.predicate(pred.op(), name, apply(pred.literal().value()));
// case NOT_IN:
// return null;
- default:
- // no strict projection for comparison or equality
- return null;
+ default:
+ // no strict projection for comparison or equality
+ return null;
+ }
}
+
+ return null;
}
@Override
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Dates.java b/api/src/main/java/org/apache/iceberg/transforms/Dates.java
index 94714d2..3b1c2ec 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Dates.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Dates.java
@@ -29,9 +29,6 @@ import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
-import static org.apache.iceberg.expressions.Expression.Operation.IS_NULL;
-import static org.apache.iceberg.expressions.Expression.Operation.NOT_NULL;
-
enum Dates implements Transform<Integer, Integer> {
YEAR(ChronoUnit.YEARS, "year"),
MONTH(ChronoUnit.MONTHS, "month"),
@@ -48,9 +45,14 @@ enum Dates implements Transform<Integer, Integer> {
@Override
public Integer apply(Integer days) {
+ if (days == null) {
+ return null;
+ }
+
if (granularity == ChronoUnit.DAYS) {
return days;
}
+
return (int) granularity.between(EPOCH, EPOCH.plusDays(days));
}
@@ -61,23 +63,30 @@ enum Dates implements Transform<Integer, Integer> {
@Override
public Type getResultType(Type sourceType) {
+ if (granularity == ChronoUnit.DAYS) {
+ return Types.DateType.get();
+ }
return Types.IntegerType.get();
}
@Override
public UnboundPredicate<Integer> project(String fieldName, BoundPredicate<Integer> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
+ if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), fieldName);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateInteger(fieldName, pred.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateInteger(fieldName, pred, this);
+ return null;
}
@Override
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Integer> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
+ if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), fieldName);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateIntegerStrict(fieldName, pred.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateIntegerStrict(fieldName, pred, this);
+ return null;
}
@Override
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Identity.java b/api/src/main/java/org/apache/iceberg/transforms/Identity.java
index ef1037b..84bcce1 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Identity.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Identity.java
@@ -61,11 +61,14 @@ class Identity<T> implements Transform<T, T> {
@Override
public UnboundPredicate<T> projectStrict(String name, BoundPredicate<T> predicate) {
- if (predicate.literal() != null) {
- return Expressions.predicate(predicate.op(), name, predicate.literal().value());
- } else {
+ if (predicate.isUnaryPredicate()) {
return Expressions.predicate(predicate.op(), name);
+ } else if (predicate.isLiteralPredicate()) {
+ return Expressions.predicate(predicate.op(), name, predicate.asLiteralPredicate().literal().value());
+ } else if (predicate.isSetPredicate()) {
+ return Expressions.predicate(predicate.op(), name, predicate.asSetPredicate().literalSet());
}
+ return null;
}
@Override
@@ -108,8 +111,7 @@ class Identity<T> implements Transform<T, T> {
public boolean equals(Object o) {
if (this == o) {
return true;
- }
- if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof Identity)) {
return false;
}
diff --git a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
index ef1e0c3..42b5b47 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
@@ -21,7 +21,7 @@ package org.apache.iceberg.transforms;
import java.math.BigDecimal;
import java.math.BigInteger;
-import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.BoundLiteralPredicate;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.UnboundPredicate;
@@ -32,7 +32,7 @@ class ProjectionUtil {
private ProjectionUtil() {}
static <T> UnboundPredicate<T> truncateInteger(
- String name, BoundPredicate<Integer> pred, Transform<Integer, T> transform) {
+ String name, BoundLiteralPredicate<Integer> pred, Transform<Integer, T> transform) {
int boundary = pred.literal().value();
switch (pred.op()) {
case LT:
@@ -52,38 +52,18 @@ class ProjectionUtil {
}
}
- static UnboundPredicate<Integer> truncateIntegerStrict(
- String name, BoundPredicate<Integer> pred, Transform<Integer, Integer> transform) {
+ static <T> UnboundPredicate<T> truncateIntegerStrict(
+ String name, BoundLiteralPredicate<Integer> pred, Transform<Integer, T> transform) {
int boundary = pred.literal().value();
switch (pred.op()) {
case LT:
- // predicate would be <= the previous partition
- return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
+ return predicate(Expression.Operation.LT, name, transform.apply(boundary));
case LT_EQ:
- // Checking if the literal is at the upper partition boundary
- if (transform.apply(boundary + 1).equals(transform.apply(boundary))) {
- // Literal is not at upper boundary, for eg: 2019-07-02T02:12:34.0000
- // the predicate can be < 2019-07-01
- return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
- } else {
- // Literal is not at upper boundary, for eg: 2019-07-02T23:59:59.99999
- // the predicate can be <= 2019-07-02
- return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
- }
+ return predicate(Expression.Operation.LT, name, transform.apply(boundary + 1));
case GT:
- // predicate would be >= the next partition
- return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
+ return predicate(Expression.Operation.GT, name, transform.apply(boundary));
case GT_EQ:
- // Checking if the literal is at the lower partition boundary
- if (transform.apply(boundary - 1).equals(transform.apply(boundary))) {
- // Literal is not at lower boundary, for eg: 2019-07-02T02:12:34.0000
- // the predicate can be >= 2019-07-03
- return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
- } else {
- // Literal was at the lower boundary, for eg: 2019-07-02T00:00:00.0000
- // the predicate can be >= 2019-07-02
- return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
- }
+ return predicate(Expression.Operation.GT, name, transform.apply(boundary - 1));
case NOT_EQ:
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
case EQ:
@@ -94,38 +74,18 @@ class ProjectionUtil {
}
}
- static UnboundPredicate<Integer> truncateLongStrict(
- String name, BoundPredicate<Long> pred, Transform<Long, Integer> transform) {
+ static <T> UnboundPredicate<T> truncateLongStrict(
+ String name, BoundLiteralPredicate<Long> pred, Transform<Long, T> transform) {
long boundary = pred.literal().value();
switch (pred.op()) {
case LT:
- // predicate would be <= the previous partition
- return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
+ return predicate(Expression.Operation.LT, name, transform.apply(boundary));
case LT_EQ:
- // Checking if the literal is at the upper partition boundary
- if (transform.apply(boundary + 1L).equals(transform.apply(boundary))) {
- // Literal is not at upper boundary, for eg: 2019-07-02T02:12:34.0000
- // the predicate can be <= 2019-07-01
- return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
- } else {
- // Literal is not at upper boundary, for eg: 2019-07-02T23:59:59.99999
- // the predicate can be <= 2019-07-02
- return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
- }
+ return predicate(Expression.Operation.LT, name, transform.apply(boundary + 1L));
case GT:
- // predicate would be >= the next partition
- return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
+ return predicate(Expression.Operation.GT, name, transform.apply(boundary));
case GT_EQ:
- // Checking if the literal is at the lower partition boundary
- if (transform.apply(boundary - 1L).equals(transform.apply(boundary))) {
- // Literal is not at lower boundary, for eg: 2019-07-02T02:12:34.0000
- // the predicate can be >= 2019-07-03
- return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
- } else {
- // Literal was at the lower boundary, for eg: 2019-07-02T00:00:00.0000
- // the predicate can be >= 2019-07-02
- return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
- }
+ return predicate(Expression.Operation.GT, name, transform.apply(boundary - 1L));
case NOT_EQ:
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
case EQ:
@@ -137,7 +97,7 @@ class ProjectionUtil {
}
static <T> UnboundPredicate<T> truncateLong(
- String name, BoundPredicate<Long> pred, Transform<Long, T> transform) {
+ String name, BoundLiteralPredicate<Long> pred, Transform<Long, T> transform) {
long boundary = pred.literal().value();
switch (pred.op()) {
case LT:
@@ -158,7 +118,7 @@ class ProjectionUtil {
}
static <T> UnboundPredicate<T> truncateDecimal(
- String name, BoundPredicate<BigDecimal> pred,
+ String name, BoundLiteralPredicate<BigDecimal> pred,
Transform<BigDecimal, T> transform) {
BigDecimal boundary = pred.literal().value();
switch (pred.op()) {
@@ -185,8 +145,40 @@ class ProjectionUtil {
}
}
+ static <T> UnboundPredicate<T> truncateDecimalStrict(
+ String name, BoundLiteralPredicate<BigDecimal> pred,
+ Transform<BigDecimal, T> transform) {
+ BigDecimal boundary = pred.literal().value();
+
+ BigDecimal minusOne = new BigDecimal(
+ boundary.unscaledValue().subtract(BigInteger.ONE),
+ boundary.scale());
+
+ BigDecimal plusOne = new BigDecimal(
+ boundary.unscaledValue().add(BigInteger.ONE),
+ boundary.scale());
+
+ switch (pred.op()) {
+ case LT:
+ return predicate(Expression.Operation.LT, name, transform.apply(boundary));
+ case LT_EQ:
+ return predicate(Expression.Operation.LT, name, transform.apply(plusOne));
+ case GT:
+ return predicate(Expression.Operation.GT, name, transform.apply(boundary));
+ case GT_EQ:
+ return predicate(Expression.Operation.GT, name, transform.apply(minusOne));
+ case NOT_EQ:
+ return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
+ case EQ:
+ // there is no predicate that guarantees equality because adjacent decimals transform to the same value
+ return null;
+ default:
+ return null;
+ }
+ }
+
static <S, T> UnboundPredicate<T> truncateArray(
- String name, BoundPredicate<S> pred, Transform<S, T> transform) {
+ String name, BoundLiteralPredicate<S> pred, Transform<S, T> transform) {
S boundary = pred.literal().value();
switch (pred.op()) {
case LT:
@@ -197,8 +189,30 @@ class ProjectionUtil {
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
case EQ:
return predicate(Expression.Operation.EQ, name, transform.apply(boundary));
-// case IN: // TODO
-// return Expressions.predicate(Operation.IN, name, transform.apply(boundary));
+ case STARTS_WITH:
+ return predicate(Expression.Operation.STARTS_WITH, name, transform.apply(boundary));
+ // case IN: // TODO
+ // return Expressions.predicate(Operation.IN, name, transform.apply(boundary));
+ default:
+ return null;
+ }
+ }
+
+ static <S, T> UnboundPredicate<T> truncateArrayStrict(
+ String name, BoundLiteralPredicate<S> pred, Transform<S, T> transform) {
+ S boundary = pred.literal().value();
+ switch (pred.op()) {
+ case LT:
+ case LT_EQ:
+ return predicate(Expression.Operation.LT, name, transform.apply(boundary));
+ case GT:
+ case GT_EQ:
+ return predicate(Expression.Operation.GT, name, transform.apply(boundary));
+ case NOT_EQ:
+ return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
+ case EQ:
+ // there is no predicate that guarantees equality because adjacent values transform to the same partition
+ return null;
default:
return null;
}
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java
index 7259def..11a8db2 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java
@@ -29,9 +29,6 @@ import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
-import static org.apache.iceberg.expressions.Expression.Operation.IS_NULL;
-import static org.apache.iceberg.expressions.Expression.Operation.NOT_NULL;
-
enum Timestamps implements Transform<Long, Integer> {
YEAR(ChronoUnit.YEARS, "year"),
MONTH(ChronoUnit.MONTHS, "month"),
@@ -49,12 +46,16 @@ enum Timestamps implements Transform<Long, Integer> {
@Override
public Integer apply(Long timestampMicros) {
+ if (timestampMicros == null) {
+ return null;
+ }
+
// discards fractional seconds, not needed for calculation
OffsetDateTime timestamp = Instant
.ofEpochSecond(timestampMicros / 1_000_000)
.atOffset(ZoneOffset.UTC);
- Integer year = Long.valueOf(granularity.between(EPOCH, timestamp)).intValue();
- return year;
+
+ return (int) granularity.between(EPOCH, timestamp);
}
@Override
@@ -64,23 +65,30 @@ enum Timestamps implements Transform<Long, Integer> {
@Override
public Type getResultType(Type sourceType) {
+ if (granularity == ChronoUnit.DAYS) {
+ return Types.DateType.get();
+ }
return Types.IntegerType.get();
}
@Override
public UnboundPredicate<Integer> project(String fieldName, BoundPredicate<Long> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
+ if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), fieldName);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateLong(fieldName, pred.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateLong(fieldName, pred, this);
+ return null;
}
@Override
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Long> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
+ if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), fieldName);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateLongStrict(fieldName, pred.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateLongStrict(fieldName, pred, this);
+ return null;
}
@Override
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transform.java b/api/src/main/java/org/apache/iceberg/transforms/Transform.java
index 9bab11b..6a7b837 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Transform.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Transform.java
@@ -43,7 +43,7 @@ public interface Transform<S, T> extends Serializable {
T apply(S value);
/**
- * Checks whether this function can be applied to the give {@link Type}.
+ * Checks whether this function can be applied to the given {@link Type}.
*
* @param type a type
* @return true if this transform can be applied to the type, false otherwise
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java
index ff1eb32..ee92c29 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java
@@ -31,7 +31,7 @@ import org.apache.iceberg.types.Type;
* Factory methods for transforms.
* <p>
* Most users should create transforms using a
- * {@link PartitionSpec.Builder#builderFor(Schema)} partition spec builder}.
+ * {@link PartitionSpec#builderFor(Schema)} partition spec builder}.
*
* @see PartitionSpec#builderFor(Schema) The partition spec builder.
*/
@@ -55,13 +55,19 @@ public class Transforms {
if (transform.equalsIgnoreCase("identity")) {
return Identity.get(type);
- } else if (type.typeId() == Type.TypeID.TIMESTAMP) {
- return Timestamps.valueOf(transform.toUpperCase(Locale.ENGLISH));
- } else if (type.typeId() == Type.TypeID.DATE) {
- return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH));
}
- throw new IllegalArgumentException("Unknown transform: " + transform);
+ try {
+ if (type.typeId() == Type.TypeID.TIMESTAMP) {
+ return Timestamps.valueOf(transform.toUpperCase(Locale.ENGLISH));
+ } else if (type.typeId() == Type.TypeID.DATE) {
+ return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH));
+ }
+ } catch (IllegalArgumentException ignored) {
+ // fall through to return unknown transform
+ }
+
+ return new UnknownTransform<>(type, transform);
}
/**
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
index 2eacaa2..dc3ac6a 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
@@ -23,17 +23,15 @@ import com.google.common.base.Objects;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
+import org.apache.iceberg.expressions.BoundLiteralPredicate;
import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.BoundUnaryPredicate;
+import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.util.UnicodeUtil;
-import static org.apache.iceberg.expressions.Expression.Operation.IS_NULL;
-import static org.apache.iceberg.expressions.Expression.Operation.LT;
-import static org.apache.iceberg.expressions.Expression.Operation.LT_EQ;
-import static org.apache.iceberg.expressions.Expression.Operation.NOT_NULL;
-
abstract class Truncate<T> implements Transform<T, T> {
@SuppressWarnings("unchecked")
static <T> Truncate<T> get(Type type, int width) {
@@ -78,6 +76,10 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public Integer apply(Integer value) {
+ if (value == null) {
+ return null;
+ }
+
return value - (((value % width) + width) % width);
}
@@ -88,61 +90,32 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public UnboundPredicate<Integer> project(String name, BoundPredicate<Integer> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
+ if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), name);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateInteger(name, pred.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateInteger(name, pred, this);
+ return null;
}
@Override
- public UnboundPredicate<Integer> projectStrict(String name, BoundPredicate<Integer> predicate) {
+ public UnboundPredicate<Integer> projectStrict(String name, BoundPredicate<Integer> pred) {
// TODO: for integers, can this return the original predicate?
// No. the predicate needs to be in terms of the applied value. For all x, apply(x) <= x.
// Therefore, the lower bound can be transformed outside of a greater-than bound.
- int in;
- int out;
- int inImage;
- int outImage;
- switch (predicate.op()) {
- case LT:
- in = predicate.literal().value() - 1;
- out = predicate.literal().value();
- inImage = apply(in);
- outImage = apply(out);
- if (inImage != outImage) {
- return Expressions.predicate(LT_EQ, name, inImage);
- } else {
- return Expressions.predicate(LT, name, inImage);
- }
- case LT_EQ:
- in = predicate.literal().value();
- out = predicate.literal().value() + 1;
- inImage = apply(in);
- outImage = apply(out);
- if (inImage != outImage) {
- return Expressions.predicate(LT_EQ, name, inImage);
- } else {
- return Expressions.predicate(LT, name, inImage);
- }
- case GT:
- case GT_EQ:
- case EQ:
- case NOT_EQ:
-// case IN:
-// break;
-// case NOT_IN:
-// break;
- default:
- return null;
+ if (pred instanceof BoundUnaryPredicate) {
+ return Expressions.predicate(pred.op(), name);
+ } else if (pred instanceof BoundLiteralPredicate) {
+ return ProjectionUtil.truncateIntegerStrict(name, pred.asLiteralPredicate(), this);
}
+ return null;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
- }
- if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof TruncateInteger)) {
return false;
}
@@ -175,6 +148,10 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public Long apply(Long value) {
+ if (value == null) {
+ return null;
+ }
+
return value - (((value % width) + width) % width);
}
@@ -185,14 +162,21 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public UnboundPredicate<Long> project(String name, BoundPredicate<Long> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
+ if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), name);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateLong(name, pred.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateLong(name, pred, this);
+ return null;
}
@Override
- public UnboundPredicate<Long> projectStrict(String name, BoundPredicate<Long> predicate) {
+ public UnboundPredicate<Long> projectStrict(String name, BoundPredicate<Long> pred) {
+ if (pred.isUnaryPredicate()) {
+ return Expressions.predicate(pred.op(), name);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateLongStrict(name, pred.asLiteralPredicate(), this);
+ }
return null;
}
@@ -200,8 +184,7 @@ abstract class Truncate<T> implements Transform<T, T> {
public boolean equals(Object o) {
if (this == o) {
return true;
- }
- if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof TruncateLong)) {
return false;
}
@@ -234,6 +217,10 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public CharSequence apply(CharSequence value) {
+ if (value == null) {
+ return null;
+ }
+
return UnicodeUtil.truncateString(value, length);
}
@@ -244,16 +231,32 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public UnboundPredicate<CharSequence> project(String name,
- BoundPredicate<CharSequence> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
- return Expressions.predicate(pred.op(), name);
+ BoundPredicate<CharSequence> predicate) {
+ if (predicate.isUnaryPredicate()) {
+ return Expressions.predicate(predicate.op(), name);
+ } else if (predicate.isLiteralPredicate()) {
+ return ProjectionUtil.truncateArray(name, predicate.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateArray(name, pred, this);
+ return null;
}
@Override
public UnboundPredicate<CharSequence> projectStrict(String name,
- BoundPredicate<CharSequence> predicate) {
+ BoundPredicate<CharSequence> predicate) {
+ if (predicate instanceof BoundUnaryPredicate) {
+ return Expressions.predicate(predicate.op(), name);
+ } else if (predicate instanceof BoundLiteralPredicate) {
+ BoundLiteralPredicate<CharSequence> pred = predicate.asLiteralPredicate();
+ if (pred.op() == Expression.Operation.STARTS_WITH) {
+ if (pred.literal().value().length() < width()) {
+ return Expressions.predicate(pred.op(), name, pred.literal().value());
+ } else if (pred.literal().value().length() == width()) {
+ return Expressions.equal(name, pred.literal().value());
+ }
+ } else {
+ return ProjectionUtil.truncateArrayStrict(name, pred, this);
+ }
+ }
return null;
}
@@ -261,8 +264,7 @@ abstract class Truncate<T> implements Transform<T, T> {
public boolean equals(Object o) {
if (this == o) {
return true;
- }
- if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof TruncateString)) {
return false;
}
@@ -295,6 +297,10 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public ByteBuffer apply(ByteBuffer value) {
+ if (value == null) {
+ return null;
+ }
+
ByteBuffer ret = value.duplicate();
ret.limit(Math.min(value.limit(), value.position() + length));
return ret;
@@ -307,16 +313,23 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public UnboundPredicate<ByteBuffer> project(String name,
- BoundPredicate<ByteBuffer> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
+ BoundPredicate<ByteBuffer> pred) {
+ if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), name);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateArray(name, pred.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateArray(name, pred, this);
+ return null;
}
@Override
public UnboundPredicate<ByteBuffer> projectStrict(String name,
- BoundPredicate<ByteBuffer> predicate) {
+ BoundPredicate<ByteBuffer> pred) {
+ if (pred.isUnaryPredicate()) {
+ return Expressions.predicate(pred.op(), name);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateArrayStrict(name, pred.asLiteralPredicate(), this);
+ }
return null;
}
@@ -324,8 +337,7 @@ abstract class Truncate<T> implements Transform<T, T> {
public boolean equals(Object o) {
if (this == o) {
return true;
- }
- if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof TruncateByteBuffer)) {
return false;
}
@@ -363,12 +375,17 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public BigDecimal apply(BigDecimal value) {
+ if (value == null) {
+ return null;
+ }
+
BigDecimal remainder = new BigDecimal(
value.unscaledValue()
.remainder(unscaledWidth)
.add(unscaledWidth)
.remainder(unscaledWidth),
value.scale());
+
return value.subtract(remainder);
}
@@ -379,16 +396,23 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public UnboundPredicate<BigDecimal> project(String name,
- BoundPredicate<BigDecimal> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
+ BoundPredicate<BigDecimal> pred) {
+ if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), name);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateDecimal(name, pred.asLiteralPredicate(), this);
}
- return ProjectionUtil.truncateDecimal(name, pred, this);
+ return null;
}
@Override
public UnboundPredicate<BigDecimal> projectStrict(String name,
- BoundPredicate<BigDecimal> predicate) {
+ BoundPredicate<BigDecimal> pred) {
+ if (pred.isUnaryPredicate()) {
+ return Expressions.predicate(pred.op(), name);
+ } else if (pred.isLiteralPredicate()) {
+ return ProjectionUtil.truncateDecimalStrict(name, pred.asLiteralPredicate(), this);
+ }
return null;
}
@@ -396,8 +420,7 @@ abstract class Truncate<T> implements Transform<T, T> {
public boolean equals(Object o) {
if (this == o) {
return true;
- }
- if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof TruncateDecimal)) {
return false;
}
diff --git a/api/src/main/java/org/apache/iceberg/transforms/UnknownTransform.java b/api/src/main/java/org/apache/iceberg/transforms/UnknownTransform.java
new file mode 100644
index 0000000..9c5c8da
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/transforms/UnknownTransform.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.transforms;
+
+import java.util.Objects;
+import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.UnboundPredicate;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+public class UnknownTransform<S, T> implements Transform<S, T> {
+
+ private final Type sourceType;
+ private final String transform;
+
+ UnknownTransform(Type sourceType, String transform) {
+ this.sourceType
+ = sourceType;
+ this.transform = transform;
+ }
+
+ @Override
+ public T apply(S value) {
+ throw new UnsupportedOperationException(String.format("Cannot apply unsupported transform: %s", transform));
+ }
+
+ @Override
+ public boolean canTransform(Type type) {
+ // assume the transform function can be applied for this type because unknown transform is only used when parsing
+ // a transform in an existing table. a different Iceberg version must have already validated it.
+ return this.sourceType.equals(type);
+ }
+
+ @Override
+ public Type getResultType(Type type) {
+ // the actual result type is not known
+ return Types.StringType.get();
+ }
+
+ @Override
+ public UnboundPredicate<T> project(String name, BoundPredicate<S> predicate) {
+ return null;
+ }
+
+ @Override
+ public UnboundPredicate<T> projectStrict(String name, BoundPredicate<S> predicate) {
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return transform;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ } else if (!(other instanceof UnknownTransform)) {
+ return false;
+ }
+
+ UnknownTransform<?, ?> that = (UnknownTransform<?, ?>) other;
+ return sourceType.equals(that.sourceType) && transform.equals(that.transform);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(sourceType, transform);
+ }
+}
diff --git a/api/src/main/java/org/apache/iceberg/types/CheckCompatibility.java b/api/src/main/java/org/apache/iceberg/types/CheckCompatibility.java
index 3888d39..89ea1b0 100644
--- a/api/src/main/java/org/apache/iceberg/types/CheckCompatibility.java
+++ b/api/src/main/java/org/apache/iceberg/types/CheckCompatibility.java
@@ -32,13 +32,28 @@ import org.apache.iceberg.Schema;
public class CheckCompatibility extends TypeUtil.CustomOrderSchemaVisitor<List<String>> {
/**
* Returns a list of compatibility errors for writing with the given write schema.
+ * This includes nullability: writing optional (nullable) values to a required field is an error.
*
* @param readSchema a read schema
* @param writeSchema a write schema
* @return a list of error details, or an empty list if there are no compatibility problems
*/
public static List<String> writeCompatibilityErrors(Schema readSchema, Schema writeSchema) {
- return TypeUtil.visit(readSchema, new CheckCompatibility(writeSchema, true));
+ return TypeUtil.visit(readSchema, new CheckCompatibility(writeSchema, true, true));
+ }
+
+ /**
+ * Returns a list of compatibility errors for writing with the given write schema.
+ * This checks type compatibility and not nullability: writing optional (nullable) values
+ * to a required field is not an error. To check nullability as well as types,
+ * use {@link #writeCompatibilityErrors(Schema, Schema)}.
+ *
+ * @param readSchema a read schema
+ * @param writeSchema a write schema
+ * @return a list of error details, or an empty list if there are no compatibility problems
+ */
+ public static List<String> typeCompatibilityErrors(Schema readSchema, Schema writeSchema) {
+ return TypeUtil.visit(readSchema, new CheckCompatibility(writeSchema, true, false));
}
/**
@@ -49,20 +64,22 @@ public class CheckCompatibility extends TypeUtil.CustomOrderSchemaVisitor<List<S
* @return a list of error details, or an empty list if there are no compatibility problems
*/
public static List<String> readCompatibilityErrors(Schema readSchema, Schema writeSchema) {
- return TypeUtil.visit(readSchema, new CheckCompatibility(writeSchema, false));
+ return TypeUtil.visit(readSchema, new CheckCompatibility(writeSchema, false, true));
}
- private static final List<String> NO_ERRORS = ImmutableList.of();
+ private static final ImmutableList<String> NO_ERRORS = ImmutableList.of();
private final Schema schema;
private final boolean checkOrdering;
+ private final boolean checkNullability;
// the current file schema, maintained while traversing a write schema
private Type currentType;
- private CheckCompatibility(Schema schema, boolean checkOrdering) {
+ private CheckCompatibility(Schema schema, boolean checkOrdering, boolean checkNullability) {
this.schema = schema;
this.checkOrdering = checkOrdering;
+ this.checkNullability = checkNullability;
}
@Override
@@ -132,7 +149,7 @@ public class CheckCompatibility extends TypeUtil.CustomOrderSchemaVisitor<List<S
this.currentType = field.type();
try {
- if (readField.isRequired() && field.isOptional()) {
+ if (checkNullability && readField.isRequired() && field.isOptional()) {
errors.add(readField.name() + " should be required, but is optional");
}
diff --git a/api/src/main/java/org/apache/iceberg/types/Conversions.java b/api/src/main/java/org/apache/iceberg/types/Conversions.java
index e2c728d..d0a2967 100644
--- a/api/src/main/java/org/apache/iceberg/types/Conversions.java
+++ b/api/src/main/java/org/apache/iceberg/types/Conversions.java
@@ -31,6 +31,7 @@ import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.UUID;
import org.apache.iceberg.exceptions.RuntimeIOException;
+import org.apache.iceberg.expressions.Literal;
public class Conversions {
@@ -51,7 +52,7 @@ public class Conversions {
case LONG:
return Long.valueOf(asString);
case FLOAT:
- return Long.valueOf(asString);
+ return Float.valueOf(asString);
case DOUBLE:
return Double.valueOf(asString);
case STRING:
@@ -66,6 +67,8 @@ public class Conversions {
return asString.getBytes(StandardCharsets.UTF_8);
case DECIMAL:
return new BigDecimal(asString);
+ case DATE:
+ return Literal.of(asString).to(Types.DateType.get()).value();
default:
throw new UnsupportedOperationException(
"Unsupported type for fromPartitionString: " + type);
@@ -78,7 +81,15 @@ public class Conversions {
ThreadLocal.withInitial(StandardCharsets.UTF_8::newDecoder);
public static ByteBuffer toByteBuffer(Type type, Object value) {
- switch (type.typeId()) {
+ return toByteBuffer(type.typeId(), value);
+ }
+
+ public static ByteBuffer toByteBuffer(Type.TypeID typeId, Object value) {
+ if (value == null) {
+ return null;
+ }
+
+ switch (typeId) {
case BOOLEAN:
return ByteBuffer.allocate(1).put(0, (Boolean) value ? (byte) 0x01 : (byte) 0x00);
case INTEGER:
@@ -110,7 +121,7 @@ public class Conversions {
case DECIMAL:
return ByteBuffer.wrap(((BigDecimal) value).unscaledValue().toByteArray());
default:
- throw new UnsupportedOperationException("Cannot serialize type: " + type);
+ throw new UnsupportedOperationException("Cannot serialize type: " + typeId);
}
}
@@ -120,6 +131,10 @@ public class Conversions {
}
private static Object internalFromByteBuffer(Type type, ByteBuffer buffer) {
+ if (buffer == null) {
+ return null;
+ }
+
ByteBuffer tmp = buffer.duplicate();
if (type == Types.UUIDType.get() || type instanceof Types.DecimalType) {
tmp.order(ByteOrder.BIG_ENDIAN);
diff --git a/api/src/main/java/org/apache/iceberg/types/IndexByName.java b/api/src/main/java/org/apache/iceberg/types/IndexByName.java
index 112d16d..a9ad58e 100644
--- a/api/src/main/java/org/apache/iceberg/types/IndexByName.java
+++ b/api/src/main/java/org/apache/iceberg/types/IndexByName.java
@@ -20,53 +20,99 @@
package org.apache.iceberg.types;
import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import java.util.List;
+import java.util.Deque;
import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.function.Supplier;
import org.apache.iceberg.Schema;
+import org.apache.iceberg.exceptions.ValidationException;
-public class IndexByName extends TypeUtil.SchemaVisitor<Map<String, Integer>> {
+public class IndexByName extends TypeUtil.CustomOrderSchemaVisitor<Map<String, Integer>> {
private static final Joiner DOT = Joiner.on(".");
+ private final Deque<String> fieldNames = Lists.newLinkedList();
private final Map<String, Integer> nameToId = Maps.newHashMap();
@Override
- public Map<String, Integer> schema(Schema schema, Map<String, Integer> structResult) {
- return nameToId;
+ public Map<String, Integer> schema(Schema schema, Supplier<Map<String, Integer>> structResult) {
+ return structResult.get();
}
@Override
- public Map<String, Integer> struct(Types.StructType struct, List<Map<String, Integer>> fieldResults) {
+ public Map<String, Integer> struct(Types.StructType struct, Iterable<Map<String, Integer>> fieldResults) {
+ // iterate through the fields to update the index for each one, use size to avoid errorprone failure
+ Lists.newArrayList(fieldResults).size();
return nameToId;
}
@Override
- public Map<String, Integer> field(Types.NestedField field, Map<String, Integer> fieldResult) {
+ public Map<String, Integer> field(Types.NestedField field, Supplier<Map<String, Integer>> fieldResult) {
+ withName(field.name(), fieldResult::get);
addField(field.name(), field.fieldId());
return null;
}
@Override
- public Map<String, Integer> list(Types.ListType list, Map<String, Integer> elementResult) {
+ public Map<String, Integer> list(Types.ListType list, Supplier<Map<String, Integer>> elementResult) {
+ // add element
for (Types.NestedField field : list.fields()) {
addField(field.name(), field.fieldId());
}
+
+ if (list.elementType().isStructType()) {
+ // return to avoid errorprone failure
+ return elementResult.get();
+ }
+
+ withName("element", elementResult::get);
+
return null;
}
@Override
- public Map<String, Integer> map(Types.MapType map, Map<String, Integer> keyResult, Map<String, Integer> valueResult) {
+ public Map<String, Integer> map(Types.MapType map,
+ Supplier<Map<String, Integer>> keyResult,
+ Supplier<Map<String, Integer>> valueResult) {
+ withName("key", keyResult::get);
+
+ // add key and value
for (Types.NestedField field : map.fields()) {
addField(field.name(), field.fieldId());
}
+
+ if (map.valueType().isStructType()) {
+ // return to avoid errorprone failure
+ return valueResult.get();
+ }
+
+ withName("value", valueResult::get);
+
return null;
}
+ private <T> T withName(String name, Callable<T> callable) {
+ fieldNames.push(name);
+ try {
+ return callable.call();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ } finally {
+ fieldNames.pop();
+ }
+ }
+
private void addField(String name, int fieldId) {
String fullName = name;
- if (!fieldNames().isEmpty()) {
- fullName = DOT.join(DOT.join(fieldNames().descendingIterator()), name);
+ if (!fieldNames.isEmpty()) {
+ fullName = DOT.join(DOT.join(fieldNames.descendingIterator()), name);
+ }
+
+ Integer existingFieldId = nameToId.put(fullName, fieldId);
+ if (existingFieldId != null && !"element".equals(name) && !"value".equals(name)) {
+ throw new ValidationException(
+ "Invalid schema: multiple fields for name %s: %s and %s", fullName, existingFieldId, fieldId);
}
- nameToId.put(fullName, fieldId);
}
}
diff --git a/api/src/main/java/org/apache/iceberg/types/ReassignIds.java b/api/src/main/java/org/apache/iceberg/types/ReassignIds.java
index 2a73145..1609949 100644
--- a/api/src/main/java/org/apache/iceberg/types/ReassignIds.java
+++ b/api/src/main/java/org/apache/iceberg/types/ReassignIds.java
@@ -73,6 +73,9 @@ class ReassignIds extends TypeUtil.CustomOrderSchemaVisitor<Type> {
Types.StructType sourceStruct = sourceType.asStructType();
Types.NestedField sourceField = sourceStruct.field(field.name());
+ if (sourceField == null) {
+ throw new IllegalArgumentException("Field " + field.name() + " not found in source schema");
+ }
this.sourceType = sourceField.type();
try {
diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
index e3267dd..e76d082 100644
--- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
+++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
@@ -30,6 +30,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.function.Supplier;
import org.apache.iceberg.Schema;
@@ -111,7 +112,7 @@ public class TypeUtil {
*
* @param schema a schema
* @param nextId an id assignment function
- * @return an structurally identical schema with new ids assigned by the nextId function
+ * @return a structurally identical schema with new ids assigned by the nextId function
*/
public static Schema assignFreshIds(Schema schema, NextID nextId) {
return new Schema(TypeUtil
@@ -121,6 +122,17 @@ public class TypeUtil {
}
/**
+ * Assigns strictly increasing fresh ids for all fields in a schema, starting from 1.
+ *
+ * @param schema a schema
+ * @return a structurally identical schema with new ids assigned strictly increasing from 1
+ */
+ public static Schema assignIncreasingFreshIds(Schema schema) {
+ AtomicInteger lastColumnId = new AtomicInteger(0);
+ return TypeUtil.assignFreshIds(schema, lastColumnId::incrementAndGet);
+ }
+
+ /**
* Reassigns ids in a schema from another schema.
* <p>
* Ids are determined by field names. If a field in the schema cannot be found in the source
@@ -178,7 +190,6 @@ public class TypeUtil {
}
public static class SchemaVisitor<T> {
- private final Deque<String> fieldNames = Lists.newLinkedList();
private final Deque<Integer> fieldIds = Lists.newLinkedList();
public T schema(Schema schema, T structResult) {
@@ -205,10 +216,6 @@ public class TypeUtil {
return null;
}
- protected Deque<String> fieldNames() {
- return fieldNames;
- }
-
protected Deque<Integer> fieldIds() {
return fieldIds;
}
@@ -225,13 +232,11 @@ public class TypeUtil {
List<T> results = Lists.newArrayListWithExpectedSize(struct.fields().size());
for (Types.NestedField field : struct.fields()) {
visitor.fieldIds.push(field.fieldId());
- visitor.fieldNames.push(field.name());
T result;
try {
result = visit(field.type(), visitor);
} finally {
visitor.fieldIds.pop();
- visitor.fieldNames.pop();
}
results.add(visitor.field(field, result));
}
diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java
index 9eca4db..f7e91a0 100644
--- a/api/src/main/java/org/apache/iceberg/types/Types.java
+++ b/api/src/main/java/org/apache/iceberg/types/Types.java
@@ -247,7 +247,7 @@ public class Types {
public boolean equals(Object o) {
if (this == o) {
return true;
- } else if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof TimestampType)) {
return false;
}
@@ -326,7 +326,7 @@ public class Types {
public boolean equals(Object o) {
if (this == o) {
return true;
- } else if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof FixedType)) {
return false;
}
@@ -395,7 +395,7 @@ public class Types {
public boolean equals(Object o) {
if (this == o) {
return true;
- } else if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof DecimalType)) {
return false;
}
@@ -429,6 +429,14 @@ public class Types {
return new NestedField(false, id, name, type, doc);
}
+ public static NestedField of(int id, boolean isOptional, String name, Type type) {
+ return new NestedField(isOptional, id, name, type, null);
+ }
+
+ public static NestedField of(int id, boolean isOptional, String name, Type type, String doc) {
+ return new NestedField(isOptional, id, name, type, doc);
+ }
+
private final boolean isOptional;
private final int id;
private final String name;
@@ -480,7 +488,7 @@ public class Types {
public boolean equals(Object o) {
if (this == o) {
return true;
- } else if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof NestedField)) {
return false;
}
@@ -581,7 +589,7 @@ public class Types {
public boolean equals(Object o) {
if (this == o) {
return true;
- } else if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof StructType)) {
return false;
}
@@ -716,7 +724,7 @@ public class Types {
public boolean equals(Object o) {
if (this == o) {
return true;
- } else if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof ListType)) {
return false;
}
@@ -834,7 +842,7 @@ public class Types {
public boolean equals(Object o) {
if (this == o) {
return true;
- } else if (o == null || getClass() != o.getClass()) {
+ } else if (!(o instanceof MapType)) {
return false;
}
diff --git a/api/src/main/java/org/apache/iceberg/util/BinaryUtil.java b/api/src/main/java/org/apache/iceberg/util/BinaryUtil.java
index fdfe751..9b39481 100644
--- a/api/src/main/java/org/apache/iceberg/util/BinaryUtil.java
+++ b/api/src/main/java/org/apache/iceberg/util/BinaryUtil.java
@@ -32,8 +32,10 @@ public class BinaryUtil {
* Truncates the input byte buffer to the given length
*/
public static ByteBuffer truncateBinary(ByteBuffer input, int length) {
- Preconditions.checkArgument(length > 0 && length < input.remaining(),
- "Truncate length should be positive and lower than the number of remaining elements");
+ Preconditions.checkArgument(length > 0, "Truncate length should be positive");
+ if (length >= input.remaining()) {
+ return input;
+ }
byte[] array = new byte[length];
input.duplicate().get(array);
return ByteBuffer.wrap(array);
diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
new file mode 100644
index 0000000..b51ff34
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.util;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Sets;
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Set;
+
+public class CharSequenceSet implements Set<CharSequence>, Serializable {
+ public static Set<CharSequence> of(Iterable<CharSequence> charSequences) {
+ return new CharSequenceSet(charSequences);
+ }
+
+ private final Set<CharSequenceWrapper> wrapperSet;
+ private final CharSequenceWrapper containsWrapper = CharSequenceWrapper.wrap(null);
+
+ private CharSequenceSet(Iterable<CharSequence> charSequences) {
+ this.wrapperSet = Sets.newHashSet(Iterables.transform(charSequences, CharSequenceWrapper::wrap));
+ }
+
+ @Override
+ public int size() {
+ return wrapperSet.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return wrapperSet.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object obj) {
+ if (obj instanceof CharSequence) {
+ return wrapperSet.contains(containsWrapper.set((CharSequence) obj));
+ }
+ return false;
+ }
+
+ @Override
+ public Iterator<CharSequence> iterator() {
+ return Iterators.transform(wrapperSet.iterator(), CharSequenceWrapper::get);
+ }
+
+ @Override
+ public Object[] toArray() {
+ return Iterators.toArray(iterator(), CharSequence.class);
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public <T> T[] toArray(T[] destArray) {
+ int size = wrapperSet.size();
+ if (destArray.length < size) {
+ return (T[]) toArray();
+ }
+
+ Iterator<CharSequence> iter = iterator();
+ int ind = 0;
+ while (iter.hasNext()) {
+ destArray[ind] = (T) iter.next();
+ ind += 1;
+ }
+
+ if (destArray.length > size) {
+ destArray[size] = null;
+ }
+
+ return destArray;
+ }
+
+ @Override
+ public boolean add(CharSequence charSequence) {
+ return wrapperSet.add(CharSequenceWrapper.wrap(charSequence));
+ }
+
+ @Override
+ public boolean remove(Object obj) {
+ if (obj instanceof CharSequence) {
+ return wrapperSet.remove(containsWrapper.set((CharSequence) obj));
+ }
+ return false;
+ }
+
+ @Override
+ public boolean containsAll(Collection<?> objects) {
+ if (objects != null) {
+ return Iterables.all(objects, this::contains);
+ }
+ return false;
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends CharSequence> charSequences) {
+ if (charSequences != null) {
+ return Iterables.addAll(wrapperSet, Iterables.transform(charSequences, CharSequenceWrapper::wrap));
+ }
+ return false;
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ if (objects != null) {
+ return Iterables.removeAll(wrapperSet, objects);
+ }
+ return false;
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ if (objects != null) {
+ return Iterables.removeAll(wrapperSet, objects);
+ }
+ return false;
+ }
+
+ @Override
+ public void clear() {
+ wrapperSet.clear();
+ }
+}
diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
new file mode 100644
index 0000000..0ec95b8
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.util;
+
+import java.io.Serializable;
+import org.apache.iceberg.types.Comparators;
+
+/**
+ * Wrapper class to adapt CharSequence for use in maps and sets.
+ */
+public class CharSequenceWrapper implements CharSequence, Serializable {
+ public static CharSequenceWrapper wrap(CharSequence seq) {
+ return new CharSequenceWrapper(seq);
+ }
+
+ private CharSequence wrapped;
+
+ private CharSequenceWrapper(CharSequence wrapped) {
+ this.wrapped = wrapped;
+ }
+
+ public CharSequenceWrapper set(CharSequence newWrapped) {
+ this.wrapped = newWrapped;
+ return this;
+ }
+
+ public CharSequence get() {
+ return wrapped;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ } else if (!(other instanceof CharSequenceWrapper)) {
+ return false;
+ }
+
+ CharSequenceWrapper that = (CharSequenceWrapper) other;
+ return Comparators.charSequences().compare(wrapped, that.wrapped) == 0;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = 177;
+ for (int i = 0; i < wrapped.length(); i += 1) {
+ char ch = wrapped.charAt(i);
+ result = 31 * result + (int) ch;
+ }
+ return result;
+ }
+
+ @Override
+ public int length() {
+ return wrapped.length();
+ }
+
+ @Override
+ public char charAt(int index) {
+ return wrapped.charAt(index);
+ }
+
+ @Override
+ public CharSequence subSequence(int start, int end) {
+ return wrapped.subSequence(start, end);
+ }
+
+ @Override
+ public String toString() {
+ return wrapped.toString();
+ }
+}
diff --git a/api/src/main/java/org/apache/iceberg/util/UnicodeUtil.java b/api/src/main/java/org/apache/iceberg/util/UnicodeUtil.java
index f76ec73..c371d4a 100644
--- a/api/src/main/java/org/apache/iceberg/util/UnicodeUtil.java
+++ b/api/src/main/java/org/apache/iceberg/util/UnicodeUtil.java
@@ -41,7 +41,7 @@ public class UnicodeUtil {
*/
public static CharSequence truncateString(CharSequence input, int length) {
Preconditions.checkArgument(length > 0, "Truncate length should be positive");
- StringBuffer sb = new StringBuffer(input);
+ StringBuilder sb = new StringBuilder(input);
// Get the number of unicode characters in the input
int numUniCodeCharacters = sb.codePointCount(0, sb.length());
// No need to truncate if the number of unicode characters in the char sequence is <= truncate length
@@ -70,24 +70,24 @@ public class UnicodeUtil {
public static Literal<CharSequence> truncateStringMax(Literal<CharSequence> input, int length) {
CharSequence inputCharSeq = input.value();
// Truncate the input to the specified truncate length.
- StringBuffer truncatedStringBuffer = new StringBuffer(truncateString(inputCharSeq, length));
+ StringBuilder truncatedStringBuilder = new StringBuilder(truncateString(inputCharSeq, length));
// No need to increment if the input length is under the truncate length
- if (inputCharSeq.length() == truncatedStringBuffer.length()) {
+ if (inputCharSeq.length() == truncatedStringBuilder.length()) {
return input;
}
// Try incrementing the code points from the end
for (int i = length - 1; i >= 0; i--) {
// Get the offset in the truncated string buffer where the number of unicode characters = i
- int offsetByCodePoint = truncatedStringBuffer.offsetByCodePoints(0, i);
- int nextCodePoint = truncatedStringBuffer.codePointAt(offsetByCodePoint) + 1;
+ int offsetByCodePoint = truncatedStringBuilder.offsetByCodePoints(0, i);
+ int nextCodePoint = truncatedStringBuilder.codePointAt(offsetByCodePoint) + 1;
// No overflow
if (nextCodePoint != 0 && Character.isValidCodePoint(nextCodePoint)) {
- truncatedStringBuffer.setLength(offsetByCodePoint);
+ truncatedStringBuilder.setLength(offsetByCodePoint);
// Append next code point to the truncated substring
- truncatedStringBuffer.appendCodePoint(nextCodePoint);
- return Literal.of(truncatedStringBuffer.toString());
+ truncatedStringBuilder.appendCodePoint(nextCodePoint);
+ return Literal.of(truncatedStringBuilder.toString());
}
}
return null; // Cannot find a valid upper bound
diff --git a/core/src/test/java/org/apache/iceberg/AssertHelpers.java b/api/src/test/java/org/apache/iceberg/AssertHelpers.java
similarity index 100%
rename from core/src/test/java/org/apache/iceberg/AssertHelpers.java
rename to api/src/test/java/org/apache/iceberg/AssertHelpers.java
diff --git a/api/src/test/java/org/apache/iceberg/TestHelpers.java b/api/src/test/java/org/apache/iceberg/TestHelpers.java
index 41eceb5..8ed4598 100644
--- a/api/src/test/java/org/apache/iceberg/TestHelpers.java
+++ b/api/src/test/java/org/apache/iceberg/TestHelpers.java
@@ -27,8 +27,8 @@ import java.io.ObjectOutputStream;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
-import java.util.concurrent.Callable;
import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.BoundSetPredicate;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.ExpressionVisitors;
import org.apache.iceberg.expressions.UnboundPredicate;
@@ -52,6 +52,13 @@ public class TestHelpers {
}
@SuppressWarnings("unchecked")
+ public static <T> BoundSetPredicate<T> assertAndUnwrapBoundSet(Expression expr) {
+ Assert.assertTrue("Expression should be a bound set predicate: " + expr,
+ expr instanceof BoundSetPredicate);
+ return (BoundSetPredicate<T>) expr;
+ }
+
+ @SuppressWarnings("unchecked")
public static <T> UnboundPredicate<T> assertAndUnwrapUnbound(Expression expr) {
Assert.assertTrue("Expression should be an unbound predicate: " + expr,
expr instanceof UnboundPredicate);
@@ -120,65 +127,6 @@ public class TestHelpers {
}
}
- /**
- * A convenience method to avoid a large number of @Test(expected=...) tests
- * @param message A String message to describe this assertion
- * @param expected An Exception class that the Runnable should throw
- * @param containedInMessage A String that should be contained by the thrown
- * exception's message
- * @param callable A Callable that is expected to throw the exception
- */
- public static void assertThrows(String message,
- Class<? extends Exception> expected,
- String containedInMessage,
- Callable callable) {
- try {
- callable.call();
- Assert.fail("No exception was thrown (" + message + "), expected: " +
- expected.getName());
- } catch (Exception actual) {
- handleException(message, expected, containedInMessage, actual);
- }
- }
-
- /**
- * A convenience method to avoid a large number of @Test(expected=...) tests
- * @param message A String message to describe this assertion
- * @param expected An Exception class that the Runnable should throw
- * @param containedInMessage A String that should be contained by the thrown
- * exception's message
- * @param runnable A Runnable that is expected to throw the runtime exception
- */
- public static void assertThrows(String message,
- Class<? extends Exception> expected,
- String containedInMessage,
- Runnable runnable) {
- try {
- runnable.run();
- Assert.fail("No exception was thrown (" + message + "), expected: " +
- expected.getName());
- } catch (Exception actual) {
- handleException(message, expected, containedInMessage, actual);
- }
- }
-
- private static void handleException(String message,
- Class<? extends Exception> expected,
- String containedInMessage,
- Exception actual) {
- try {
- Assert.assertEquals(message, expected, actual.getClass());
- Assert.assertTrue(
- "Expected exception message (" + containedInMessage + ") missing: " +
- actual.getMessage(),
- actual.getMessage().contains(containedInMessage)
- );
- } catch (AssertionError e) {
- e.addSuppressed(actual);
- throw e;
- }
- }
-
public static class TestFieldSummary implements ManifestFile.PartitionFieldSummary {
private final boolean containsNull;
private final ByteBuffer lowerBound;
diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
index 24a5716..48e2d75 100644
--- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
+++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
@@ -21,6 +21,7 @@ package org.apache.iceberg;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.NestedField;
+import org.junit.Assert;
import org.junit.Test;
public class TestPartitionSpecValidation {
@@ -29,66 +30,67 @@ public class TestPartitionSpecValidation {
NestedField.required(2, "ts", Types.TimestampType.withZone()),
NestedField.required(3, "another_ts", Types.TimestampType.withZone()),
NestedField.required(4, "d", Types.TimestampType.withZone()),
- NestedField.required(5, "another_d", Types.TimestampType.withZone())
+ NestedField.required(5, "another_d", Types.TimestampType.withZone()),
+ NestedField.required(6, "s", Types.StringType.get())
);
@Test
public void testMultipleTimestampPartitions() {
- TestHelpers.assertThrows("Should not allow year(ts) and year(ts)",
+ AssertHelpers.assertThrows("Should not allow year(ts) and year(ts)",
IllegalArgumentException.class, "Cannot use partition name more than once",
() -> PartitionSpec.builderFor(SCHEMA).year("ts").year("ts").build());
- TestHelpers.assertThrows("Should not allow year(ts) and month(ts)",
+ AssertHelpers.assertThrows("Should not allow year(ts) and month(ts)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).year("ts").month("ts").build());
- TestHelpers.assertThrows("Should not allow year(ts) and day(ts)",
+ AssertHelpers.assertThrows("Should not allow year(ts) and day(ts)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).year("ts").day("ts").build());
- TestHelpers.assertThrows("Should not allow year(ts) and hour(ts)",
+ AssertHelpers.assertThrows("Should not allow year(ts) and hour(ts)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).year("ts").hour("ts").build());
- TestHelpers.assertThrows("Should not allow month(ts) and month(ts)",
+ AssertHelpers.assertThrows("Should not allow month(ts) and month(ts)",
IllegalArgumentException.class, "Cannot use partition name more than once",
() -> PartitionSpec.builderFor(SCHEMA).month("ts").month("ts").build());
- TestHelpers.assertThrows("Should not allow month(ts) and day(ts)",
+ AssertHelpers.assertThrows("Should not allow month(ts) and day(ts)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).month("ts").day("ts").build());
- TestHelpers.assertThrows("Should not allow month(ts) and hour(ts)",
+ AssertHelpers.assertThrows("Should not allow month(ts) and hour(ts)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).month("ts").hour("ts").build());
- TestHelpers.assertThrows("Should not allow day(ts) and day(ts)",
+ AssertHelpers.assertThrows("Should not allow day(ts) and day(ts)",
IllegalArgumentException.class, "Cannot use partition name more than once",
() -> PartitionSpec.builderFor(SCHEMA).day("ts").day("ts").build());
- TestHelpers.assertThrows("Should not allow day(ts) and hour(ts)",
+ AssertHelpers.assertThrows("Should not allow day(ts) and hour(ts)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).day("ts").hour("ts").build());
- TestHelpers.assertThrows("Should not allow hour(ts) and hour(ts)",
+ AssertHelpers.assertThrows("Should not allow hour(ts) and hour(ts)",
IllegalArgumentException.class, "Cannot use partition name more than once",
() -> PartitionSpec.builderFor(SCHEMA).hour("ts").hour("ts").build());
}
@Test
public void testMultipleDatePartitions() {
- TestHelpers.assertThrows("Should not allow year(d) and year(d)",
+ AssertHelpers.assertThrows("Should not allow year(d) and year(d)",
IllegalArgumentException.class, "Cannot use partition name more than once",
() -> PartitionSpec.builderFor(SCHEMA).year("d").year("d").build());
- TestHelpers.assertThrows("Should not allow year(d) and month(d)",
+ AssertHelpers.assertThrows("Should not allow year(d) and month(d)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).year("d").month("d").build());
- TestHelpers.assertThrows("Should not allow year(d) and day(d)",
+ AssertHelpers.assertThrows("Should not allow year(d) and day(d)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).year("d").day("d").build());
- TestHelpers.assertThrows("Should not allow month(d) and month(d)",
+ AssertHelpers.assertThrows("Should not allow month(d) and month(d)",
IllegalArgumentException.class, "Cannot use partition name more than once",
() -> PartitionSpec.builderFor(SCHEMA).month("d").month("d").build());
- TestHelpers.assertThrows("Should not allow month(d) and day(d)",
+ AssertHelpers.assertThrows("Should not allow month(d) and day(d)",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> PartitionSpec.builderFor(SCHEMA).month("d").day("d").build());
- TestHelpers.assertThrows("Should not allow day(d) and day(d)",
+ AssertHelpers.assertThrows("Should not allow day(d) and day(d)",
IllegalArgumentException.class, "Cannot use partition name more than once",
() -> PartitionSpec.builderFor(SCHEMA).day("d").day("d").build());
}
@@ -121,27 +123,85 @@ public class TestPartitionSpecValidation {
PartitionSpec.builderFor(SCHEMA).hour("d").hour("another_d").build();
}
+
+ @Test
+ public void testSettingPartitionTransformsWithCustomTargetNames() {
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).year("ts", "custom_year")
+ .build().fields().get(0).name(), "custom_year");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).month("ts", "custom_month")
+ .build().fields().get(0).name(), "custom_month");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).day("ts", "custom_day")
+ .build().fields().get(0).name(), "custom_day");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).hour("ts", "custom_hour")
+ .build().fields().get(0).name(), "custom_hour");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA)
+ .bucket("ts", 4, "custom_bucket")
+ .build().fields().get(0).name(), "custom_bucket");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA)
+ .truncate("s", 1, "custom_truncate")
+ .build().fields().get(0).name(), "custom_truncate");
+ }
+
+ @Test
+ public void testSettingPartitionTransformsWithCustomTargetNamesThatAlreadyExist() {
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).year("ts", "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).month("ts", "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).day("ts", "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).hour("ts", "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).truncate("ts", 2, "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).bucket("ts", 4, "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name sourced from a different column",
+ IllegalArgumentException.class,
+ "Cannot create identity partition sourced from different field in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).identity("ts", "another_ts"));
+ }
+
@Test
public void testMissingSourceColumn() {
- TestHelpers.assertThrows("Should detect missing source column",
+ AssertHelpers.assertThrows("Should detect missing source column",
IllegalArgumentException.class, "Cannot find source column",
() -> PartitionSpec.builderFor(SCHEMA).year("missing").build());
- TestHelpers.assertThrows("Should detect missing source column",
+ AssertHelpers.assertThrows("Should detect missing source column",
IllegalArgumentException.class, "Cannot find source column",
() -> PartitionSpec.builderFor(SCHEMA).month("missing").build());
- TestHelpers.assertThrows("Should detect missing source column",
+ AssertHelpers.assertThrows("Should detect missing source column",
IllegalArgumentException.class, "Cannot find source column",
() -> PartitionSpec.builderFor(SCHEMA).day("missing").build());
- TestHelpers.assertThrows("Should detect missing source column",
+ AssertHelpers.assertThrows("Should detect missing source column",
IllegalArgumentException.class, "Cannot find source column",
() -> PartitionSpec.builderFor(SCHEMA).hour("missing").build());
- TestHelpers.assertThrows("Should detect missing source column",
+ AssertHelpers.assertThrows("Should detect missing source column",
IllegalArgumentException.class, "Cannot find source column",
() -> PartitionSpec.builderFor(SCHEMA).bucket("missing", 4).build());
- TestHelpers.assertThrows("Should detect missing source column",
+ AssertHelpers.assertThrows("Should detect missing source column",
IllegalArgumentException.class, "Cannot find source column",
() -> PartitionSpec.builderFor(SCHEMA).truncate("missing", 5).build());
- TestHelpers.assertThrows("Should detect missing source column",
+ AssertHelpers.assertThrows("Should detect missing source column",
IllegalArgumentException.class, "Cannot find source column",
() -> PartitionSpec.builderFor(SCHEMA).identity("missing").build());
}
diff --git a/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java b/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java
new file mode 100644
index 0000000..a0f1eab
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestTransformSerialization {
+ @Test
+ public void testTransforms() throws Exception {
+ Schema schema = new Schema(
+ Types.NestedField.required(1, "i", Types.IntegerType.get()),
+ Types.NestedField.required(2, "l", Types.LongType.get()),
+ Types.NestedField.required(3, "d", Types.DateType.get()),
+ Types.NestedField.required(4, "t", Types.TimeType.get()),
+ Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()),
+ Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)),
+ Types.NestedField.required(7, "s", Types.StringType.get()),
+ Types.NestedField.required(8, "u", Types.UUIDType.get()),
+ Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)),
+ Types.NestedField.required(10, "b", Types.BinaryType.get())
+ );
+
+ // a spec with all of the allowed transform/type pairs
+ PartitionSpec[] specs = new PartitionSpec[] {
+ PartitionSpec.builderFor(schema).identity("i").build(),
+ PartitionSpec.builderFor(schema).identity("l").build(),
+ PartitionSpec.builderFor(schema).identity("d").build(),
+ PartitionSpec.builderFor(schema).identity("t").build(),
+ PartitionSpec.builderFor(schema).identity("ts").build(),
+ PartitionSpec.builderFor(schema).identity("dec").build(),
+ PartitionSpec.builderFor(schema).identity("s").build(),
+ PartitionSpec.builderFor(schema).identity("u").build(),
+ PartitionSpec.builderFor(schema).identity("f").build(),
+ PartitionSpec.builderFor(schema).identity("b").build(),
+ PartitionSpec.builderFor(schema).bucket("i", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("l", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("d", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("t", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("ts", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("dec", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("s", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("u", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("f", 128).build(),
+ PartitionSpec.builderFor(schema).bucket("b", 128).build(),
+ PartitionSpec.builderFor(schema).year("d").build(),
+ PartitionSpec.builderFor(schema).month("d").build(),
+ PartitionSpec.builderFor(schema).day("d").build(),
+ PartitionSpec.builderFor(schema).year("ts").build(),
+ PartitionSpec.builderFor(schema).month("ts").build(),
+ PartitionSpec.builderFor(schema).day("ts").build(),
+ PartitionSpec.builderFor(schema).hour("ts").build(),
+ PartitionSpec.builderFor(schema).truncate("i", 10).build(),
+ PartitionSpec.builderFor(schema).truncate("l", 10).build(),
+ PartitionSpec.builderFor(schema).truncate("dec", 10).build(),
+ PartitionSpec.builderFor(schema).truncate("s", 10).build(),
+ PartitionSpec.builderFor(schema).add(6, "dec_unsupported", "unsupported").build(),
+ };
+
+ for (PartitionSpec spec : specs) {
+ Assert.assertEquals("Deserialization should produce equal partition spec",
+ spec, TestHelpers.roundTripSerialize(spec));
+ }
+ }
+}
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestEvaluator.java b/api/src/test/java/org/apache/iceberg/expressions/TestEvaluator.java
index 42adb9a..a437f74 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestEvaluator.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestEvaluator.java
@@ -19,7 +19,11 @@
package org.apache.iceberg.expressions;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
import org.apache.avro.util.Utf8;
+import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.types.Types;
@@ -33,20 +37,23 @@ import static org.apache.iceberg.expressions.Expressions.and;
import static org.apache.iceberg.expressions.Expressions.equal;
import static org.apache.iceberg.expressions.Expressions.greaterThan;
import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.in;
import static org.apache.iceberg.expressions.Expressions.isNull;
import static org.apache.iceberg.expressions.Expressions.lessThan;
import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
import static org.apache.iceberg.expressions.Expressions.not;
import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.notIn;
import static org.apache.iceberg.expressions.Expressions.notNull;
import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.predicate;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
public class TestEvaluator {
private static final StructType STRUCT = StructType.of(
required(13, "x", Types.IntegerType.get()),
- required(14, "y", Types.IntegerType.get()),
+ required(14, "y", Types.DoubleType.get()),
optional(15, "z", Types.IntegerType.get()),
optional(16, "s1", Types.StructType.of(
Types.NestedField.required(17, "s2", Types.StructType.of(
@@ -163,6 +170,8 @@ public class TestEvaluator {
@Test
public void testEqual() {
+ Assert.assertEquals(1, equal("x", 5).literals().size());
+
Evaluator evaluator = new Evaluator(STRUCT, equal("x", 7));
Assert.assertTrue("7 == 7 => true", evaluator.eval(TestHelpers.Row.of(7, 8, null)));
Assert.assertFalse("6 == 7 => false", evaluator.eval(TestHelpers.Row.of(6, 8, null)));
@@ -184,6 +193,8 @@ public class TestEvaluator {
@Test
public void testNotEqual() {
+ Assert.assertEquals(1, notEqual("x", 5).literals().size());
+
Evaluator evaluator = new Evaluator(STRUCT, notEqual("x", 7));
Assert.assertFalse("7 != 7 => false", evaluator.eval(TestHelpers.Row.of(7, 8, null)));
Assert.assertTrue("6 != 7 => true", evaluator.eval(TestHelpers.Row.of(6, 8, null)));
@@ -346,7 +357,7 @@ public class TestEvaluator {
@Test
public void testCaseSensitiveNot() {
- TestHelpers.assertThrows(
+ AssertHelpers.assertThrows(
"X != x when case sensitivity is on",
ValidationException.class,
"Cannot find field 'X' in struct",
@@ -362,4 +373,198 @@ public class TestEvaluator {
Assert.assertFalse("string(abc) == utf8(abcd) => false",
evaluator.eval(TestHelpers.Row.of(new Utf8("abcd"))));
}
+
+ @Test
+ public void testIn() {
+ Assert.assertEquals(3, in("s", 7, 8, 9).literals().size());
+ Assert.assertEquals(3, in("s", 7, 8.1, Long.MAX_VALUE).literals().size());
+ Assert.assertEquals(3, in("s", "abc", "abd", "abc").literals().size());
+ Assert.assertEquals(0, in("s").literals().size());
+ Assert.assertEquals(1, in("s", 5).literals().size());
+ Assert.assertEquals(2, in("s", 5, 5).literals().size());
+ Assert.assertEquals(2, in("s", Arrays.asList(5, 5)).literals().size());
+ Assert.assertEquals(0, in("s", Collections.emptyList()).literals().size());
+
+ Evaluator evaluator = new Evaluator(STRUCT, in("x", 7, 8, Long.MAX_VALUE));
+ Assert.assertTrue("7 in [7, 8] => true", evaluator.eval(TestHelpers.Row.of(7, 8, null)));
+ Assert.assertFalse("9 in [7, 8] => false", evaluator.eval(TestHelpers.Row.of(9, 8, null)));
+
+ Evaluator intSetEvaluator = new Evaluator(STRUCT,
+ in("x", Long.MAX_VALUE, Integer.MAX_VALUE, Long.MIN_VALUE));
+ Assert.assertTrue("Integer.MAX_VALUE in [Integer.MAX_VALUE] => true",
+ intSetEvaluator.eval(TestHelpers.Row.of(Integer.MAX_VALUE, 7.0, null)));
+ Assert.assertFalse("6 in [Integer.MAX_VALUE] => false",
+ intSetEvaluator.eval(TestHelpers.Row.of(6, 6.8, null)));
+
+ Evaluator integerEvaluator = new Evaluator(STRUCT, in("y", 7, 8, 9.1));
+ Assert.assertTrue("7.0 in [7, 8, 9.1] => true",
+ integerEvaluator.eval(TestHelpers.Row.of(0, 7.0, null)));
+ Assert.assertTrue("9.1 in [7, 8, 9.1] => true",
+ integerEvaluator.eval(TestHelpers.Row.of(7, 9.1, null)));
+ Assert.assertFalse("6.8 in [7, 8, 9.1] => false",
+ integerEvaluator.eval(TestHelpers.Row.of(6, 6.8, null)));
+
+ Evaluator structEvaluator = new Evaluator(STRUCT, in("s1.s2.s3.s4.i", 7, 8, 9));
+ Assert.assertTrue("7 in [7, 8, 9] => true",
+ structEvaluator.eval(TestHelpers.Row.of(7, 8, null,
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(7)))))));
+ Assert.assertFalse("6 in [7, 8, 9] => false",
+ structEvaluator.eval(TestHelpers.Row.of(6, 8, null,
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(6)))))));
+
+ StructType charSeqStruct = StructType.of(required(34, "s", Types.StringType.get()));
+ Evaluator charSeqEvaluator = new Evaluator(charSeqStruct, in("s", "abc", "abd", "abc"));
+ Assert.assertTrue("utf8(abc) in [string(abc), string(abd)] => true",
+ charSeqEvaluator.eval(TestHelpers.Row.of(new Utf8("abc"))));
+ Assert.assertFalse("utf8(abcd) in [string(abc), string(abd)] => false",
+ charSeqEvaluator.eval(TestHelpers.Row.of(new Utf8("abcd"))));
+ }
+
+ @Test
+ public void testInExceptions() {
+ AssertHelpers.assertThrows(
+ "Throw exception if value is null",
+ NullPointerException.class,
+ "Cannot create expression literal from null",
+ () -> in("x", (Literal) null));
+
+ AssertHelpers.assertThrows(
+ "Throw exception if value is null",
+ NullPointerException.class,
+ "Values cannot be null for IN predicate",
+ () -> in("x", (Collection<?>) null));
+
+ AssertHelpers.assertThrows(
+ "Throw exception if calling literal() for IN predicate",
+ IllegalArgumentException.class,
+ "IN predicate cannot return a literal",
+ () -> in("x", 5, 6).literal());
+
+ AssertHelpers.assertThrows(
+ "Throw exception if any value in the input is null",
+ NullPointerException.class,
+ "Cannot create expression literal from null",
+ () -> in("x", 1, 2, null));
+
+ AssertHelpers.assertThrows(
+ "Throw exception if binding fails for any element in the set",
+ ValidationException.class,
+ "Invalid value for conversion to type int",
+ () -> new Evaluator(STRUCT, in("x", 7, 8, 9.1)));
+
+ AssertHelpers.assertThrows(
+ "Throw exception if no input value",
+ IllegalArgumentException.class,
+ "Cannot create IN predicate without a value",
+ () -> predicate(Expression.Operation.IN, "x"));
+
+ AssertHelpers.assertThrows(
+ "Implicit conversion IN to EQ and throw exception if binding fails",
+ ValidationException.class,
+ "Invalid value for conversion to type int",
+ () -> new Evaluator(STRUCT, predicate(Expression.Operation.IN, "x", 5.1)));
+ }
+
+ @Test
+ public void testNotIn() {
+ Assert.assertEquals(3, notIn("s", 7, 8, 9).literals().size());
+ Assert.assertEquals(3, notIn("s", 7, 8.1, Long.MAX_VALUE).literals().size());
+ Assert.assertEquals(3, notIn("s", "abc", "abd", "abc").literals().size());
+ Assert.assertEquals(0, notIn("s").literals().size());
+ Assert.assertEquals(1, notIn("s", 5).literals().size());
+ Assert.assertEquals(2, notIn("s", 5, 5).literals().size());
+ Assert.assertEquals(2, notIn("s", Arrays.asList(5, 5)).literals().size());
+ Assert.assertEquals(0, notIn("s", Collections.emptyList()).literals().size());
+
+ Evaluator evaluator = new Evaluator(STRUCT, notIn("x", 7, 8, Long.MAX_VALUE));
+ Assert.assertFalse("7 not in [7, 8] => false", evaluator.eval(TestHelpers.Row.of(7, 8, null)));
+ Assert.assertTrue("6 not in [7, 8] => true", evaluator.eval(TestHelpers.Row.of(9, 8, null)));
+
+ Evaluator intSetEvaluator = new Evaluator(STRUCT,
+ notIn("x", Long.MAX_VALUE, Integer.MAX_VALUE, Long.MIN_VALUE));
+ Assert.assertFalse("Integer.MAX_VALUE not_in [Integer.MAX_VALUE] => false",
+ intSetEvaluator.eval(TestHelpers.Row.of(Integer.MAX_VALUE, 7.0, null)));
+ Assert.assertTrue("6 not_in [Integer.MAX_VALUE] => true",
+ intSetEvaluator.eval(TestHelpers.Row.of(6, 6.8, null)));
+
+ Evaluator integerEvaluator = new Evaluator(STRUCT, notIn("y", 7, 8, 9.1));
+ Assert.assertFalse("7.0 not in [7, 8, 9] => false",
+ integerEvaluator.eval(TestHelpers.Row.of(0, 7.0, null)));
+ Assert.assertFalse("9.1 not in [7, 8, 9.1] => false",
+ integerEvaluator.eval(TestHelpers.Row.of(7, 9.1, null)));
+ Assert.assertTrue("6.8 not in [7, 8, 9.1] => true",
+ integerEvaluator.eval(TestHelpers.Row.of(6, 6.8, null)));
+
+ Evaluator structEvaluator = new Evaluator(STRUCT, notIn("s1.s2.s3.s4.i", 7, 8, 9));
+ Assert.assertFalse("7 not in [7, 8, 9] => false",
+ structEvaluator.eval(TestHelpers.Row.of(7, 8, null,
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(7)))))));
+ Assert.assertTrue("6 not in [7, 8, 9] => true",
+ structEvaluator.eval(TestHelpers.Row.of(6, 8, null,
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(
+ TestHelpers.Row.of(6)))))));
+
+ StructType charSeqStruct = StructType.of(required(34, "s", Types.StringType.get()));
+ Evaluator charSeqEvaluator = new Evaluator(charSeqStruct, notIn("s", "abc", "abd", "abc"));
+ Assert.assertFalse("utf8(abc) not in [string(abc), string(abd)] => false",
+ charSeqEvaluator.eval(TestHelpers.Row.of(new Utf8("abc"))));
+ Assert.assertTrue("utf8(abcd) not in [string(abc), string(abd)] => true",
+ charSeqEvaluator.eval(TestHelpers.Row.of(new Utf8("abcd"))));
+ }
+
+ @Test
+ public void testNotInExceptions() {
+ AssertHelpers.assertThrows(
+ "Throw exception if value is null",
+ NullPointerException.class,
+ "Cannot create expression literal from null",
+ () -> notIn("x", (Literal) null));
+
+ AssertHelpers.assertThrows(
+ "Throw exception if value is null",
+ NullPointerException.class,
+ "Values cannot be null for NOT_IN predicate",
+ () -> notIn("x", (Collection<?>) null));
+
+ AssertHelpers.assertThrows(
+ "Throw exception if calling literal() for IN predicate",
+ IllegalArgumentException.class,
+ "NOT_IN predicate cannot return a literal",
+ () -> notIn("x", 5, 6).literal());
+
+ AssertHelpers.assertThrows(
+ "Throw exception if any value in the input is null",
+ NullPointerException.class,
+ "Cannot create expression literal from null",
+ () -> notIn("x", 1, 2, null));
+
+ AssertHelpers.assertThrows(
+ "Throw exception if binding fails for any element in the set",
+ ValidationException.class,
+ "Invalid value for conversion to type int",
+ () -> new Evaluator(STRUCT, notIn("x", 7, 8, 9.1)));
+
+ AssertHelpers.assertThrows(
+ "Throw exception if no input value",
+ IllegalArgumentException.class,
+ "Cannot create NOT_IN predicate without a value",
+ () -> predicate(Expression.Operation.NOT_IN, "x"));
+
+ AssertHelpers.assertThrows(
+ "Implicit conversion NOT_IN to NOT_EQ and throw exception if binding fails",
+ ValidationException.class,
+ "Invalid value for conversion to type int",
+ () -> new Evaluator(STRUCT, predicate(Expression.Operation.NOT_IN, "x", 5.1)));
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
index 3e84699..fc4fc39 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
@@ -34,6 +34,7 @@ import static org.apache.iceberg.expressions.Expressions.greaterThan;
import static org.apache.iceberg.expressions.Expressions.lessThan;
import static org.apache.iceberg.expressions.Expressions.not;
import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
import static org.apache.iceberg.types.Types.NestedField.required;
public class TestExpressionBinding {
@@ -132,6 +133,18 @@ public class TestExpressionBinding {
}
@Test
+ public void testStartsWith() {
+ StructType struct = StructType.of(required(0, "s", Types.StringType.get()));
+ Expression expr = startsWith("s", "abc");
+ Expression boundExpr = Binder.bind(struct, expr);
+ TestHelpers.assertAllReferencesBound("StartsWith", boundExpr);
+ // make sure the expression is a StartsWith
+ BoundPredicate<?> pred = TestHelpers.assertAndUnwrap(boundExpr, BoundPredicate.class);
+ Assert.assertEquals("Should be right operation", Expression.Operation.STARTS_WITH, pred.op());
+ Assert.assertEquals("Should bind s correctly", 0, pred.ref().fieldId());
+ }
+
+ @Test
public void testAlwaysTrue() {
Assert.assertEquals("Should not change alwaysTrue",
alwaysTrue(),
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionHelpers.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionHelpers.java
index 456a967..450c9c0 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionHelpers.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionHelpers.java
@@ -19,7 +19,7 @@
package org.apache.iceberg.expressions;
-import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.AssertHelpers;
import org.junit.Assert;
import org.junit.Test;
@@ -73,7 +73,7 @@ public class TestExpressionHelpers {
@Test
public void testNullName() {
- TestHelpers.assertThrows("Should catch null column names when creating expressions",
+ AssertHelpers.assertThrows("Should catch null column names when creating expressions",
NullPointerException.class, "Name cannot be null", () -> equal(null, 5));
}
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java
index f4eeffd..6997636 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java
@@ -19,6 +19,7 @@
package org.apache.iceberg.expressions;
+import java.util.Collection;
import org.apache.iceberg.Schema;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.expressions.Expression.Operation;
@@ -30,7 +31,8 @@ public class TestExpressionSerialization {
@Test
public void testExpressions() throws Exception {
Schema schema = new Schema(
- Types.NestedField.optional(34, "a", Types.IntegerType.get())
+ Types.NestedField.optional(34, "a", Types.IntegerType.get()),
+ Types.NestedField.required(35, "s", Types.StringType.get())
);
Expression[] expressions = new Expression[] {
@@ -41,13 +43,18 @@ public class TestExpressionSerialization {
Expressions.greaterThan("z", 0),
Expressions.greaterThanOrEqual("t", 129),
Expressions.equal("col", "data"),
+ Expressions.in("col", "a", "b"),
+ Expressions.notIn("col", 1, 2, 3),
Expressions.notEqual("col", "abc"),
Expressions.notNull("maybeNull"),
Expressions.isNull("maybeNull2"),
Expressions.not(Expressions.greaterThan("a", 10)),
Expressions.and(Expressions.greaterThanOrEqual("a", 0), Expressions.lessThan("a", 3)),
Expressions.or(Expressions.lessThan("a", 0), Expressions.greaterThan("a", 10)),
- Expressions.equal("a", 5).bind(schema.asStruct())
+ Expressions.equal("a", 5).bind(schema.asStruct()),
+ Expressions.in("a", 5, 6, 7).bind(schema.asStruct()),
+ Expressions.notIn("s", "abc", "xyz").bind(schema.asStruct()),
+ Expressions.isNull("a").bind(schema.asStruct()),
};
for (Expression expression : expressions) {
@@ -92,7 +99,7 @@ public class TestExpressionSerialization {
}
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked", "checkstyle:CyclomaticComplexity"})
private static boolean equals(Predicate left, Predicate right) {
if (left.op() != right.op()) {
return false;
@@ -106,8 +113,42 @@ public class TestExpressionSerialization {
return true;
}
- return left.literal().comparator()
- .compare(left.literal().value(), right.literal().value()) == 0;
+ if (left.getClass() != right.getClass()) {
+ return false;
+ }
+
+ if (left instanceof UnboundPredicate) {
+ UnboundPredicate lpred = (UnboundPredicate) left;
+ UnboundPredicate rpred = (UnboundPredicate) right;
+ if (left.op() == Operation.IN || left.op() == Operation.NOT_IN) {
+ return equals(lpred.literals(), rpred.literals());
+ }
+ return lpred.literal().comparator()
+ .compare(lpred.literal().value(), rpred.literal().value()) == 0;
+
+ } else if (left instanceof BoundPredicate) {
+ BoundPredicate lpred = (BoundPredicate) left;
+ BoundPredicate rpred = (BoundPredicate) right;
+ if (lpred.isLiteralPredicate() && rpred.isLiteralPredicate()) {
+ return lpred.asLiteralPredicate().literal().comparator()
+ .compare(lpred.asLiteralPredicate().literal().value(), rpred.asLiteralPredicate().literal().value()) == 0;
+ } else if (lpred.isSetPredicate() && rpred.isSetPredicate()) {
+ return equals(lpred.asSetPredicate().literalSet(), rpred.asSetPredicate().literalSet());
+ } else {
+ return lpred.isUnaryPredicate() && rpred.isUnaryPredicate();
+ }
+
+ } else {
+ throw new UnsupportedOperationException(String.format(
+ "Predicate equality check for %s is not supported", left.getClass()));
+ }
+ }
+
+ private static boolean equals(Collection<Literal<?>> left, Collection<Literal<?>> right) {
+ if (left.size() != right.size()) {
+ return false;
+ }
+ return left.containsAll(right);
}
private static boolean equals(Reference left, Reference right) {
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java
index 3859f00..c2217f9 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java
@@ -21,6 +21,7 @@ package org.apache.iceberg.expressions;
import com.google.common.collect.ImmutableList;
import java.nio.ByteBuffer;
+import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
@@ -41,6 +42,7 @@ import static org.apache.iceberg.expressions.Expressions.not;
import static org.apache.iceberg.expressions.Expressions.notEqual;
import static org.apache.iceberg.expressions.Expressions.notNull;
import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
import static org.apache.iceberg.types.Conversions.toByteBuffer;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
@@ -87,6 +89,9 @@ public class TestInclusiveManifestEvaluator {
shouldRead = ManifestEvaluator.forRowFilter(notNull("no_nulls"), SPEC, true).eval(FILE);
Assert.assertTrue("Should read: non-null column contains a non-null value", shouldRead);
+
+ shouldRead = ManifestEvaluator.forRowFilter(startsWith("all_nulls", "asad"), SPEC, true).eval(FILE);
+ Assert.assertFalse("Should skip: startsWith on all null column", shouldRead);
}
@Test
@@ -103,7 +108,7 @@ public class TestInclusiveManifestEvaluator {
@Test
public void testMissingColumn() {
- TestHelpers.assertThrows("Should complain about missing column in expression",
+ AssertHelpers.assertThrows("Should complain about missing column in expression",
ValidationException.class, "Cannot find field 'missing'",
() -> ManifestEvaluator.forRowFilter(lessThan("missing", 5), SPEC, true).eval(FILE));
}
@@ -113,7 +118,7 @@ public class TestInclusiveManifestEvaluator {
Expression[] exprs = new Expression[] {
lessThan("id", 5), lessThanOrEqual("id", 30), equal("id", 70),
greaterThan("id", 78), greaterThanOrEqual("id", 90), notEqual("id", 101),
- isNull("id"), notNull("id")
+ isNull("id"), notNull("id"), startsWith("all_nulls", "a")
};
for (Expression expr : exprs) {
@@ -314,8 +319,32 @@ public class TestInclusiveManifestEvaluator {
@Test
public void testCaseSensitiveIntegerNotEqRewritten() {
- TestHelpers.assertThrows("Should complain about missing column in expression",
+ AssertHelpers.assertThrows("Should complain about missing column in expression",
ValidationException.class, "Cannot find field 'ID'",
() -> ManifestEvaluator.forRowFilter(not(equal("ID", 5)), SPEC, true).eval(FILE));
}
+
+ @Test
+ public void testStringStartsWith() {
+ boolean shouldRead = ManifestEvaluator.forRowFilter(startsWith("some_nulls", "a"), SPEC, false).eval(FILE);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = ManifestEvaluator.forRowFilter(startsWith("some_nulls", "aa"), SPEC, false).eval(FILE);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = ManifestEvaluator.forRowFilter(startsWith("some_nulls", "dddd"), SPEC, false).eval(FILE);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = ManifestEvaluator.forRowFilter(startsWith("some_nulls", "z"), SPEC, false).eval(FILE);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = ManifestEvaluator.forRowFilter(startsWith("no_nulls", "a"), SPEC, false).eval(FILE);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = ManifestEvaluator.forRowFilter(startsWith("some_nulls", "zzzz"), SPEC, false).eval(FILE);
+ Assert.assertFalse("Should skip: range doesn't match", shouldRead);
+
+ shouldRead = ManifestEvaluator.forRowFilter(startsWith("some_nulls", "1"), SPEC, false).eval(FILE);
+ Assert.assertFalse("Should skip: range doesn't match", shouldRead);
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java
index 6014ce6..fdfd139 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java
@@ -20,14 +20,16 @@
package org.apache.iceberg.expressions;
import com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.Schema;
-import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.TestHelpers.Row;
import org.apache.iceberg.TestHelpers.TestDataFile;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.IntegerType;
+import org.apache.iceberg.types.Types.StringType;
+import org.apache.iceberg.util.UnicodeUtil;
import org.junit.Assert;
import org.junit.Test;
@@ -42,6 +44,7 @@ import static org.apache.iceberg.expressions.Expressions.not;
import static org.apache.iceberg.expressions.Expressions.notEqual;
import static org.apache.iceberg.expressions.Expressions.notNull;
import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
import static org.apache.iceberg.types.Conversions.toByteBuffer;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
@@ -74,11 +77,59 @@ public class TestInclusiveMetricsEvaluator {
ImmutableMap.of(
1, toByteBuffer(IntegerType.get(), 79)));
+ private static final DataFile FILE_2 = new TestDataFile("file_2.avro", Row.of(), 50,
+ // any value counts, including nulls
+ ImmutableMap.of(3, 20L),
+ // null value counts
+ ImmutableMap.of(3, 2L),
+ // lower bounds
+ ImmutableMap.of(3, toByteBuffer(StringType.get(), "aa")),
+ // upper bounds
+ ImmutableMap.of(3, toByteBuffer(StringType.get(), "dC")));
+
+ private static final DataFile FILE_3 = new TestDataFile("file_3.avro", Row.of(), 50,
+ // any value counts, including nulls
+ ImmutableMap.of(3, 20L),
+ // null value counts
+ ImmutableMap.of(3, 2L),
+ // lower bounds
+ ImmutableMap.of(3, toByteBuffer(StringType.get(), "1str1")),
+ // upper bounds
+ ImmutableMap.of(3, toByteBuffer(StringType.get(), "3str3")));
+
+ private static final DataFile FILE_4 = new TestDataFile("file_4.avro", Row.of(), 50,
+ // any value counts, including nulls
+ ImmutableMap.of(3, 20L),
+ // null value counts
+ ImmutableMap.of(3, 2L),
+ // lower bounds
+ ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
+ // upper bounds
+ ImmutableMap.of(3, toByteBuffer(StringType.get(), "イロハニホヘト")));
+
@Test
public void testAllNulls() {
boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, notNull("all_nulls")).eval(FILE);
Assert.assertFalse("Should skip: no non-null value in all null column", shouldRead);
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, lessThan("all_nulls", "a")).eval(FILE);
+ Assert.assertFalse("Should skip: lessThan on all null column", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, lessThanOrEqual("all_nulls", "a")).eval(FILE);
+ Assert.assertFalse("Should skip: lessThanOrEqual on all null column", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, greaterThan("all_nulls", "a")).eval(FILE);
+ Assert.assertFalse("Should skip: greaterThan on all null column", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, greaterThanOrEqual("all_nulls", "a")).eval(FILE);
+ Assert.assertFalse("Should skip: greaterThanOrEqual on all null column", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, equal("all_nulls", "a")).eval(FILE);
+ Assert.assertFalse("Should skip: equal on all null column", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("all_nulls", "a")).eval(FILE);
+ Assert.assertFalse("Should skip: startsWith on all null column", shouldRead);
+
shouldRead = new InclusiveMetricsEvaluator(SCHEMA, notNull("some_nulls")).eval(FILE);
Assert.assertTrue("Should read: column with some nulls contains a non-null value", shouldRead);
@@ -109,7 +160,7 @@ public class TestInclusiveMetricsEvaluator {
@Test
public void testMissingColumn() {
- TestHelpers.assertThrows("Should complain about missing column in expression",
+ AssertHelpers.assertThrows("Should complain about missing column in expression",
ValidationException.class, "Cannot find field 'missing'",
() -> new InclusiveMetricsEvaluator(SCHEMA, lessThan("missing", 5)).eval(FILE));
}
@@ -340,4 +391,44 @@ public class TestInclusiveMetricsEvaluator {
public void testCaseSensitiveIntegerNotEqRewritten() {
boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, not(equal("ID", 5)), true).eval(FILE);
}
+
+ @Test
+ public void testStringStartsWith() {
+ boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "a"), true).eval(FILE);
+ Assert.assertTrue("Should read: no stats", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "a"), true).eval(FILE_2);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "aa"), true).eval(FILE_2);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "aaa"), true).eval(FILE_2);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "1s"), true).eval(FILE_3);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "1str1x"), true).eval(FILE_3);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "ff"), true).eval(FILE_4);
+ Assert.assertTrue("Should read: range matches", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "aB"), true).eval(FILE_2);
+ Assert.assertFalse("Should not read: range doesn't match", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "dWX"), true).eval(FILE_2);
+ Assert.assertFalse("Should not read: range doesn't match", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "5"), true).eval(FILE_3);
+ Assert.assertFalse("Should not read: range doesn't match", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", "3str3x"), true).eval(FILE_3);
+ Assert.assertFalse("Should not read: range doesn't match", shouldRead);
+
+ String aboveMax = UnicodeUtil.truncateStringMax(Literal.of("イロハニホヘト"), 4).value().toString();
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, startsWith("required", aboveMax), true).eval(FILE_4);
+ Assert.assertFalse("Should not read: range doesn't match", shouldRead);
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestPredicateBinding.java b/api/src/test/java/org/apache/iceberg/expressions/TestPredicateBinding.java
index bc5013c..37b90c1 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestPredicateBinding.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestPredicateBinding.java
@@ -22,6 +22,7 @@ package org.apache.iceberg.expressions;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.List;
+import java.util.stream.Collectors;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.StructType;
@@ -29,13 +30,16 @@ import org.junit.Assert;
import org.junit.Test;
import static org.apache.iceberg.TestHelpers.assertAndUnwrap;
+import static org.apache.iceberg.TestHelpers.assertAndUnwrapBoundSet;
import static org.apache.iceberg.expressions.Expression.Operation.EQ;
import static org.apache.iceberg.expressions.Expression.Operation.GT;
import static org.apache.iceberg.expressions.Expression.Operation.GT_EQ;
+import static org.apache.iceberg.expressions.Expression.Operation.IN;
import static org.apache.iceberg.expressions.Expression.Operation.IS_NULL;
import static org.apache.iceberg.expressions.Expression.Operation.LT;
import static org.apache.iceberg.expressions.Expression.Operation.LT_EQ;
import static org.apache.iceberg.expressions.Expression.Operation.NOT_EQ;
+import static org.apache.iceberg.expressions.Expression.Operation.NOT_IN;
import static org.apache.iceberg.expressions.Expression.Operation.NOT_NULL;
import static org.apache.iceberg.expressions.Expressions.ref;
import static org.apache.iceberg.types.Types.NestedField.optional;
@@ -61,8 +65,9 @@ public class TestPredicateBinding {
Assert.assertEquals("Should reference correct field ID", 11, bound.ref().fieldId());
Assert.assertEquals("Should not change the comparison operation", LT, bound.op());
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
Assert.assertEquals("Should not alter literal value",
- Integer.valueOf(6), bound.literal().value());
+ Integer.valueOf(6), bound.asLiteralPredicate().literal().value());
}
@Test
@@ -92,8 +97,9 @@ public class TestPredicateBinding {
Expression expr = unbound.bind(struct);
BoundPredicate<Integer> bound = assertAndUnwrap(expr);
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
Assert.assertEquals("Should not alter literal value",
- Integer.valueOf(5), bound.literal().value());
+ Integer.valueOf(5), bound.asLiteralPredicate().literal().value());
Assert.assertEquals("Should reference correct field ID", 14, bound.ref().fieldId());
Assert.assertEquals("Should not change the comparison operation", op, bound.op());
}
@@ -109,8 +115,9 @@ public class TestPredicateBinding {
Expression expr = unbound.bind(struct);
BoundPredicate<BigDecimal> bound = assertAndUnwrap(expr);
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
Assert.assertEquals("Should convert literal value to decimal",
- new BigDecimal("12.40"), bound.literal().value());
+ new BigDecimal("12.40"), bound.asLiteralPredicate().literal().value());
Assert.assertEquals("Should reference correct field ID", 15, bound.ref().fieldId());
Assert.assertEquals("Should not change the comparison operation", op, bound.op());
}
@@ -129,7 +136,7 @@ public class TestPredicateBinding {
} catch (ValidationException e) {
Assert.assertEquals("Should ",
e.getMessage(),
- "Invalid value for comparison inclusive type float: 12.40 (java.lang.String)");
+ "Invalid value for conversion to type float: 12.40 (java.lang.String)");
}
}
}
@@ -181,25 +188,29 @@ public class TestPredicateBinding {
Expression ltExpr = new UnboundPredicate<>(LT, ref("i"), (long) Integer.MAX_VALUE).bind(struct, true);
BoundPredicate<Integer> ltMax = assertAndUnwrap(ltExpr);
+ Assert.assertTrue("Should be a literal predicate", ltMax.isLiteralPredicate());
Assert.assertEquals("Should translate bound to Integer",
- (Integer) Integer.MAX_VALUE, ltMax.literal().value());
+ (Integer) Integer.MAX_VALUE, ltMax.asLiteralPredicate().literal().value());
Expression lteqExpr = new UnboundPredicate<>(LT_EQ, ref("i"), (long) Integer.MAX_VALUE)
.bind(struct);
BoundPredicate<Integer> lteqMax = assertAndUnwrap(lteqExpr);
+ Assert.assertTrue("Should be a literal predicate", lteqMax.isLiteralPredicate());
Assert.assertEquals("Should translate bound to Integer",
- (Integer) Integer.MAX_VALUE, lteqMax.literal().value());
+ (Integer) Integer.MAX_VALUE, lteqMax.asLiteralPredicate().literal().value());
Expression gtExpr = new UnboundPredicate<>(GT, ref("i"), (long) Integer.MIN_VALUE).bind(struct);
BoundPredicate<Integer> gtMin = assertAndUnwrap(gtExpr);
+ Assert.assertTrue("Should be a literal predicate", gtMin.isLiteralPredicate());
Assert.assertEquals("Should translate bound to Integer",
- (Integer) Integer.MIN_VALUE, gtMin.literal().value());
+ (Integer) Integer.MIN_VALUE, gtMin.asLiteralPredicate().literal().value());
Expression gteqExpr = new UnboundPredicate<>(GT_EQ, ref("i"), (long) Integer.MIN_VALUE)
.bind(struct);
BoundPredicate<Integer> gteqMin = assertAndUnwrap(gteqExpr);
+ Assert.assertTrue("Should be a literal predicate", gteqMin.isLiteralPredicate());
Assert.assertEquals("Should translate bound to Integer",
- (Integer) Integer.MIN_VALUE, gteqMin.literal().value());
+ (Integer) Integer.MIN_VALUE, gteqMin.asLiteralPredicate().literal().value());
}
@Test
@@ -249,25 +260,29 @@ public class TestPredicateBinding {
Expression ltExpr = new UnboundPredicate<>(LT, ref("f"), (double) Float.MAX_VALUE).bind(struct);
BoundPredicate<Float> ltMax = assertAndUnwrap(ltExpr);
+ Assert.assertTrue("Should be a literal predicate", ltMax.isLiteralPredicate());
Assert.assertEquals("Should translate bound to Float",
- (Float) Float.MAX_VALUE, ltMax.literal().value());
+ (Float) Float.MAX_VALUE, ltMax.asLiteralPredicate().literal().value());
Expression lteqExpr = new UnboundPredicate<>(LT_EQ, ref("f"), (double) Float.MAX_VALUE)
.bind(struct);
BoundPredicate<Float> lteqMax = assertAndUnwrap(lteqExpr);
+ Assert.assertTrue("Should be a literal predicate", lteqMax.isLiteralPredicate());
Assert.assertEquals("Should translate bound to Float",
- (Float) Float.MAX_VALUE, lteqMax.literal().value());
+ (Float) Float.MAX_VALUE, lteqMax.asLiteralPredicate().literal().value());
Expression gtExpr = new UnboundPredicate<>(GT, ref("f"), (double) -Float.MAX_VALUE).bind(struct);
BoundPredicate<Float> gtMin = assertAndUnwrap(gtExpr);
+ Assert.assertTrue("Should be a literal predicate", gtMin.isLiteralPredicate());
Assert.assertEquals("Should translate bound to Float",
- Float.valueOf(-Float.MAX_VALUE), gtMin.literal().value());
+ Float.valueOf(-Float.MAX_VALUE), gtMin.asLiteralPredicate().literal().value());
Expression gteqExpr = new UnboundPredicate<>(GT_EQ, ref("f"), (double) -Float.MAX_VALUE)
.bind(struct);
BoundPredicate<Float> gteqMin = assertAndUnwrap(gteqExpr);
+ Assert.assertTrue("Should be a literal predicate", gteqMin.isLiteralPredicate());
Assert.assertEquals("Should translate bound to Float",
- Float.valueOf(-Float.MAX_VALUE), gteqMin.literal().value());
+ Float.valueOf(-Float.MAX_VALUE), gteqMin.asLiteralPredicate().literal().value());
}
@Test
@@ -280,7 +295,7 @@ public class TestPredicateBinding {
BoundPredicate<?> bound = assertAndUnwrap(expr);
Assert.assertEquals("Should use the same operation", IS_NULL, bound.op());
Assert.assertEquals("Should use the correct field", 19, bound.ref().fieldId());
- Assert.assertNull("Should not have a literal value", bound.literal());
+ Assert.assertTrue("Should be a unary predicate", bound.isUnaryPredicate());
StructType required = StructType.of(required(20, "s", Types.StringType.get()));
Assert.assertEquals("IsNull inclusive a required field should be alwaysFalse",
@@ -296,10 +311,214 @@ public class TestPredicateBinding {
BoundPredicate<?> bound = assertAndUnwrap(expr);
Assert.assertEquals("Should use the same operation", NOT_NULL, bound.op());
Assert.assertEquals("Should use the correct field", 21, bound.ref().fieldId());
- Assert.assertNull("Should not have a literal value", bound.literal());
+ Assert.assertTrue("Should be a unary predicate", bound.isUnaryPredicate());
StructType required = StructType.of(required(22, "s", Types.StringType.get()));
Assert.assertEquals("NotNull inclusive a required field should be alwaysTrue",
Expressions.alwaysTrue(), unbound.bind(required));
}
+
+ @Test
+ public void testInPredicateBinding() {
+ StructType struct = StructType.of(
+ required(10, "x", Types.IntegerType.get()),
+ required(11, "y", Types.IntegerType.get()),
+ required(12, "z", Types.IntegerType.get())
+ );
+
+ UnboundPredicate<Integer> unbound = Expressions.in("y", 6, 7, 11);
+
+ Expression expr = unbound.bind(struct);
+ BoundSetPredicate<Integer> bound = assertAndUnwrapBoundSet(expr);
+
+ Assert.assertEquals("Should reference correct field ID", 11, bound.ref().fieldId());
+ Assert.assertEquals("Should not change the IN operation", IN, bound.op());
+ Assert.assertArrayEquals("Should not alter literal set values",
+ new Integer[]{6, 7, 11},
+ bound.literalSet().stream().sorted()
+ .collect(Collectors.toList()).toArray(new Integer[2]));
+ }
+
+ @Test
+ public void testInPredicateBindingConversion() {
+ StructType struct = StructType.of(required(15, "d", Types.DecimalType.of(9, 2)));
+ UnboundPredicate<String> unbound = Expressions.in("d", "12.40", "1.23", "99.99", "1.23");
+ Expression expr = unbound.bind(struct);
+ BoundSetPredicate<BigDecimal> bound = assertAndUnwrapBoundSet(expr);
+ Assert.assertArrayEquals("Should convert literal set values to decimal",
+ new BigDecimal[]{new BigDecimal("1.23"), new BigDecimal("12.40"), new BigDecimal("99.99")},
+ bound.literalSet().stream().sorted()
+ .collect(Collectors.toList()).toArray(new BigDecimal[2]));
+ Assert.assertEquals("Should reference correct field ID", 15, bound.ref().fieldId());
+ Assert.assertEquals("Should not change the IN operation", IN, bound.op());
+ }
+
+ @Test
+ public void testInToEqPredicate() {
+ StructType struct = StructType.of(required(14, "x", Types.IntegerType.get()));
+
+ UnboundPredicate<Integer> unbound = Expressions.in("x", 5);
+
+ Assert.assertEquals("Should create an IN predicate with a single item", IN, unbound.op());
+ Assert.assertEquals("Should create an IN predicate with a single item",
+ 1, unbound.literals().size());
+
+ Expression expr = unbound.bind(struct);
+ BoundPredicate<Integer> bound = assertAndUnwrap(expr);
+
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
+ Assert.assertEquals("Should not alter literal value",
+ Integer.valueOf(5), bound.asLiteralPredicate().literal().value());
+ Assert.assertEquals("Should reference correct field ID", 14, bound.ref().fieldId());
+ Assert.assertEquals("Should change the operation from IN to EQ", EQ, bound.op());
+ }
+
+ @Test
+ public void testInPredicateBindingConversionToEq() {
+ StructType struct = StructType.of(required(14, "x", Types.IntegerType.get()));
+
+ UnboundPredicate<Long> unbound = Expressions.in("x", 5L, Long.MAX_VALUE);
+
+ Expression.Operation op = unbound.op();
+ Assert.assertEquals("Should create an IN unbound predicate", IN, op);
+
+ Expression expr = unbound.bind(struct);
+ BoundPredicate<Integer> bound = assertAndUnwrap(expr);
+
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
+ Assert.assertEquals("Should remove aboveMax literal value",
+ Integer.valueOf(5), bound.asLiteralPredicate().literal().value());
+ Assert.assertEquals("Should reference correct field ID", 14, bound.ref().fieldId());
+ Assert.assertEquals("Should change the IN operation to EQ", EQ, bound.op());
+ }
+
+ @Test
+ public void testInPredicateBindingConversionDedupToEq() {
+ StructType struct = StructType.of(required(15, "d", Types.DecimalType.of(9, 2)));
+ UnboundPredicate<Double> unbound = Expressions.in("d", 12.40, 12.401, 12.402);
+ Assert.assertEquals("Should create an IN unbound predicate", IN, unbound.op());
+
+ Expression expr = unbound.bind(struct);
+ BoundPredicate<BigDecimal> bound = assertAndUnwrap(expr);
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
+ Assert.assertEquals("Should convert literal set values to a single decimal",
+ new BigDecimal("12.40"), bound.asLiteralPredicate().literal().value());
+ Assert.assertEquals("Should reference correct field ID", 15, bound.ref().fieldId());
+ Assert.assertEquals("Should change the IN operation to EQ", EQ, bound.op());
+ }
+
+ @Test
+ public void testInPredicateBindingConversionToExpression() {
+ StructType struct = StructType.of(required(14, "x", Types.IntegerType.get()));
+
+ UnboundPredicate<Long> unbound = Expressions.in("x", Long.MAX_VALUE - 1, Long.MAX_VALUE);
+
+ Expression.Operation op = unbound.op();
+ Assert.assertEquals("Should create an IN predicate", IN, op);
+
+ Expression expr = unbound.bind(struct);
+ Assert.assertEquals("Should change IN to alwaysFalse expression", Expressions.alwaysFalse(), expr);
+ }
+
+ @Test
+ public void testNotInPredicateBinding() {
+ StructType struct = StructType.of(
+ required(10, "x", Types.IntegerType.get()),
+ required(11, "y", Types.IntegerType.get()),
+ required(12, "z", Types.IntegerType.get())
+ );
+
+ UnboundPredicate<Integer> unbound = Expressions.notIn("y", 6, 7, 11);
+
+ Expression expr = unbound.bind(struct);
+ BoundSetPredicate<Integer> bound = assertAndUnwrapBoundSet(expr);
+
+ Assert.assertEquals("Should reference correct field ID", 11, bound.ref().fieldId());
+ Assert.assertEquals("Should not change the NOT_IN operation", NOT_IN, bound.op());
+ Assert.assertArrayEquals("Should not alter literal set values",
+ new Integer[]{6, 7, 11},
+ bound.literalSet().stream().sorted()
+ .collect(Collectors.toList()).toArray(new Integer[2]));
+ }
+
+ @Test
+ public void testNotInPredicateBindingConversion() {
+ StructType struct = StructType.of(required(15, "d", Types.DecimalType.of(9, 2)));
+ UnboundPredicate<String> unbound = Expressions.notIn("d", "12.40", "1.23", "99.99", "1.23");
+ Expression expr = unbound.bind(struct);
+ BoundSetPredicate<BigDecimal> bound = assertAndUnwrapBoundSet(expr);
+ Assert.assertArrayEquals("Should convert literal set values to decimal",
+ new BigDecimal[]{new BigDecimal("1.23"), new BigDecimal("12.40"), new BigDecimal("99.99")},
+ bound.literalSet().stream().sorted()
+ .collect(Collectors.toList()).toArray(new BigDecimal[2]));
+ Assert.assertEquals("Should reference correct field ID", 15, bound.ref().fieldId());
+ Assert.assertEquals("Should not change the NOT_IN operation", NOT_IN, bound.op());
+ }
+
+ @Test
+ public void testNotInToNotEqPredicate() {
+ StructType struct = StructType.of(required(14, "x", Types.IntegerType.get()));
+
+ UnboundPredicate<Integer> unbound = Expressions.notIn("x", 5);
+
+ Assert.assertEquals("Should create a NOT_IN predicate with a single item", NOT_IN, unbound.op());
+ Assert.assertEquals("Should create a NOT_IN predicate with a single item",
+ 1, unbound.literals().size());
+
+ Expression expr = unbound.bind(struct);
+ BoundPredicate<Integer> bound = assertAndUnwrap(expr);
+
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
+ Assert.assertEquals("Should not alter literal value",
+ Integer.valueOf(5), bound.asLiteralPredicate().literal().value());
+ Assert.assertEquals("Should reference correct field ID", 14, bound.ref().fieldId());
+ Assert.assertEquals("Should change the operation from NOT_IN to NOT_EQ", NOT_EQ, bound.op());
+ }
+
+ @Test
+ public void testNotInPredicateBindingConversionToNotEq() {
+ StructType struct = StructType.of(required(14, "x", Types.IntegerType.get()));
+
+ UnboundPredicate<Long> unbound = Expressions.notIn("x", 5L, Long.MAX_VALUE);
+
+ Expression.Operation op = unbound.op();
+ Assert.assertEquals("Should create a NOT_IN unbound predicate", NOT_IN, op);
+
+ Expression expr = unbound.bind(struct);
+ BoundPredicate<Integer> bound = assertAndUnwrap(expr);
+
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
+ Assert.assertEquals("Should remove aboveMax literal value",
+ Integer.valueOf(5), bound.asLiteralPredicate().literal().value());
+ Assert.assertEquals("Should reference correct field ID", 14, bound.ref().fieldId());
+ Assert.assertEquals("Should change the NOT_IN operation to NOT_EQ", NOT_EQ, bound.op());
+ }
+
+ @Test
+ public void testNotInPredicateBindingConversionDedupToNotEq() {
+ StructType struct = StructType.of(required(15, "d", Types.DecimalType.of(9, 2)));
+ UnboundPredicate<Double> unbound = Expressions.notIn("d", 12.40, 12.401, 12.402);
+ Assert.assertEquals("Should create a NOT_IN unbound predicate", NOT_IN, unbound.op());
+
+ Expression expr = unbound.bind(struct);
+ BoundPredicate<BigDecimal> bound = assertAndUnwrap(expr);
+ Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
+ Assert.assertEquals("Should convert literal set values to a single decimal",
+ new BigDecimal("12.40"), bound.asLiteralPredicate().literal().value());
+ Assert.assertEquals("Should reference correct field ID", 15, bound.ref().fieldId());
+ Assert.assertEquals("Should change the NOT_IN operation to NOT_EQ", NOT_EQ, bound.op());
+ }
+
+ @Test
+ public void testNotInPredicateBindingConversionToExpression() {
+ StructType struct = StructType.of(required(14, "x", Types.IntegerType.get()));
+
+ UnboundPredicate<Long> unbound = Expressions.notIn("x", Long.MAX_VALUE - 1, Long.MAX_VALUE);
+
+ Expression.Operation op = unbound.op();
+ Assert.assertEquals("Should create an NOT_IN predicate", NOT_IN, op);
+
+ Expression expr = unbound.bind(struct);
+ Assert.assertEquals("Should change NOT_IN to alwaysTrue expression", Expressions.alwaysTrue(), expr);
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java b/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java
index 2269c07..416046c 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java
@@ -20,9 +20,9 @@
package org.apache.iceberg.expressions;
import com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.Schema;
-import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.TestHelpers.Row;
import org.apache.iceberg.TestHelpers.TestDataFile;
import org.apache.iceberg.exceptions.ValidationException;
@@ -77,6 +77,38 @@ public class TestStrictMetricsEvaluator {
1, toByteBuffer(IntegerType.get(), 79),
7, toByteBuffer(IntegerType.get(), 5)));
+ private static final DataFile FILE_2 = new TestDataFile("file_2.avro", Row.of(), 50,
+ // any value counts, including nulls
+ ImmutableMap.of(
+ 4, 50L,
+ 5, 50L,
+ 6, 50L),
+ // null value counts
+ ImmutableMap.of(
+ 4, 50L,
+ 5, 10L,
+ 6, 0L),
+ // lower bounds
+ ImmutableMap.of(5, toByteBuffer(StringType.get(), "bbb")),
+ // upper bounds
+ ImmutableMap.of(5, toByteBuffer(StringType.get(), "eee")));
+
+ private static final DataFile FILE_3 = new TestDataFile("file_3.avro", Row.of(), 50,
+ // any value counts, including nulls
+ ImmutableMap.of(
+ 4, 50L,
+ 5, 50L,
+ 6, 50L),
+ // null value counts
+ ImmutableMap.of(
+ 4, 50L,
+ 5, 10L,
+ 6, 0L),
+ // lower bounds
+ ImmutableMap.of(5, toByteBuffer(StringType.get(), "bbb")),
+ // upper bounds
+ ImmutableMap.of(5, toByteBuffer(StringType.get(), "bbb")));
+
@Test
public void testAllNulls() {
boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, notNull("all_nulls")).eval(FILE);
@@ -87,6 +119,9 @@ public class TestStrictMetricsEvaluator {
shouldRead = new StrictMetricsEvaluator(SCHEMA, notNull("no_nulls")).eval(FILE);
Assert.assertTrue("Should match: non-null column contains no null values", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, notEqual("all_nulls", "a")).eval(FILE);
+ Assert.assertTrue("Should match: notEqual on all nulls column", shouldRead);
}
@Test
@@ -102,6 +137,24 @@ public class TestStrictMetricsEvaluator {
}
@Test
+ public void testSomeNulls() {
+ boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThan("some_nulls", "ggg")).eval(FILE_2);
+ Assert.assertFalse("Should not match: lessThan on some nulls column", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThanOrEqual("some_nulls", "eee")).eval(FILE_2);
+ Assert.assertFalse("Should not match: lessThanOrEqual on some nulls column", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("some_nulls", "aaa")).eval(FILE_2);
+ Assert.assertFalse("Should not match: greaterThan on some nulls column", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThanOrEqual("some_nulls", "bbb")).eval(FILE_2);
+ Assert.assertFalse("Should not match: greaterThanOrEqual on some nulls column", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, equal("some_nulls", "bbb")).eval(FILE_3);
+ Assert.assertFalse("Should not match: equal on some nulls column", shouldRead);
+ }
+
+ @Test
public void testRequiredColumn() {
boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, notNull("required")).eval(FILE);
Assert.assertTrue("Should match: required columns are always non-null", shouldRead);
@@ -112,7 +165,7 @@ public class TestStrictMetricsEvaluator {
@Test
public void testMissingColumn() {
- TestHelpers.assertThrows("Should complain about missing column in expression",
+ AssertHelpers.assertThrows("Should complain about missing column in expression",
ValidationException.class, "Cannot find field 'missing'",
() -> new StrictMetricsEvaluator(SCHEMA, lessThan("missing", 5)).eval(FILE));
}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java
index f8229cc..c344a97 100644
--- a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java
@@ -20,6 +20,7 @@
package org.apache.iceberg.transforms;
import org.apache.iceberg.expressions.Literal;
+import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;
@@ -53,4 +54,21 @@ public class TestDates {
Assert.assertEquals("Should produce \"null\" for null",
"null", Transforms.day(type).toHumanString(null));
}
+
+ @Test
+ public void testDatesReturnType() {
+ Types.DateType type = Types.DateType.get();
+
+ Transform<Integer, Integer> year = Transforms.year(type);
+ Type yearResultType = year.getResultType(type);
+ Assert.assertEquals(Types.IntegerType.get(), yearResultType);
+
+ Transform<Integer, Integer> month = Transforms.month(type);
+ Type monthResultType = month.getResultType(type);
+ Assert.assertEquals(Types.IntegerType.get(), monthResultType);
+
+ Transform<Integer, Integer> day = Transforms.day(type);
+ Type dayResultType = day.getResultType(type);
+ Assert.assertEquals(Types.DateType.get(), dayResultType);
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDatesProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestDatesProjection.java
index 7b4ee78..6f263cc 100644
--- a/api/src/test/java/org/apache/iceberg/transforms/TestDatesProjection.java
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestDatesProjection.java
@@ -43,7 +43,7 @@ public class TestDatesProjection {
private static final Schema SCHEMA = new Schema(optional(1, "date", TYPE));
public void assertProjectionStrict(PartitionSpec spec, UnboundPredicate<?> filter,
- Expression.Operation expectedOp, String expectedLiteral) {
+ Expression.Operation expectedOp, String expectedLiteral) {
Expression projection = Projections.strict(spec).project(filter);
UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
@@ -57,21 +57,21 @@ public class TestDatesProjection {
}
public void assertProjectionStrictValue(PartitionSpec spec, UnboundPredicate<?> filter,
- Expression.Operation expectedOp) {
+ Expression.Operation expectedOp) {
Expression projection = Projections.strict(spec).project(filter);
Assert.assertEquals(projection.op(), expectedOp);
}
public void assertProjectionInclusiveValue(PartitionSpec spec, UnboundPredicate<?> filter,
- Expression.Operation expectedOp) {
+ Expression.Operation expectedOp) {
Expression projection = Projections.inclusive(spec).project(filter);
Assert.assertEquals(projection.op(), expectedOp);
}
public void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
- Expression.Operation expectedOp, String expectedLiteral) {
+ Expression.Operation expectedOp, String expectedLiteral) {
Expression projection = Projections.inclusive(spec).project(filter);
UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
@@ -88,10 +88,10 @@ public class TestDatesProjection {
Integer date = (Integer) Literal.of("2017-01-01").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("date").build();
- assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016-12");
- assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2016-12");
- assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2017-02");
- assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017-01");
+ assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT, "2017-01");
+ assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT, "2017-01");
+ assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT, "2017-01");
+ assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT, "2016-12");
assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017-01");
assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE);
}
@@ -101,10 +101,10 @@ public class TestDatesProjection {
Integer date = (Integer) Literal.of("2017-12-31").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("date").build();
- assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2017-11");
- assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017-12");
- assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2018-01");
- assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2018-01");
+ assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT, "2017-12");
+ assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT, "2018-01");
+ assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT, "2017-12");
+ assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT, "2017-12");
assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017-12");
assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE);
}
@@ -140,12 +140,12 @@ public class TestDatesProjection {
Integer date = (Integer) Literal.of("2017-01-01").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("date").build();
- assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016-12-31");
+ assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT, "2017-01-01");
// should be the same date for <=
- assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017-01-01");
- assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2017-01-02");
+ assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT, "2017-01-02");
+ assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT, "2017-01-01");
// should be the same date for >=
- assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017-01-01");
+ assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT, "2016-12-31");
assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017-01-01");
assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE);
}
@@ -168,10 +168,10 @@ public class TestDatesProjection {
Integer date = (Integer) Literal.of("2017-01-01").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("date").build();
- assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016");
- assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2016");
- assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2018");
- assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017");
+ assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT, "2017");
+ assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT, "2017");
+ assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT, "2017");
+ assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT, "2016");
assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017");
assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE);
}
@@ -181,10 +181,10 @@ public class TestDatesProjection {
Integer date = (Integer) Literal.of("2017-12-31").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("date").build();
- assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016");
- assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017");
- assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2018");
- assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2018");
+ assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT, "2017");
+ assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT, "2018");
+ assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT, "2017");
+ assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT, "2017");
assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017");
assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE);
}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java
index e77bb2c..e79cab2 100644
--- a/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java
@@ -34,9 +34,9 @@ import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;
+import static org.apache.iceberg.AssertHelpers.assertThrows;
import static org.apache.iceberg.TestHelpers.assertAndUnwrap;
import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound;
-import static org.apache.iceberg.TestHelpers.assertThrows;
import static org.apache.iceberg.expressions.Expressions.and;
import static org.apache.iceberg.expressions.Expressions.equal;
import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
@@ -79,9 +79,9 @@ public class TestProjection {
"id", projected.ref().name());
Assert.assertEquals("Operation should match", bound.op(), projected.op());
- if (bound.literal() != null) {
+ if (bound.isLiteralPredicate()) {
Assert.assertEquals("Literal should be equal",
- bound.literal().value(), projected.literal().value());
+ bound.asLiteralPredicate().literal().value(), projected.literal().value());
} else {
Assert.assertNull("Literal should be null", projected.literal());
}
@@ -117,9 +117,9 @@ public class TestProjection {
"id", projected.ref().name());
Assert.assertEquals("Operation should match", bound.op(), projected.op());
- if (bound.literal() != null) {
+ if (bound.isLiteralPredicate()) {
Assert.assertEquals("Literal should be equal",
- bound.literal().value(), projected.literal().value());
+ bound.asLiteralPredicate().literal().value(), projected.literal().value());
} else {
Assert.assertNull("Literal should be null", projected.literal());
}
@@ -168,9 +168,9 @@ public class TestProjection {
"id", projected.ref().name());
Assert.assertEquals("Operation should match", bound.op(), projected.op());
- if (bound.literal() != null) {
+ if (bound.isLiteralPredicate()) {
Assert.assertEquals("Literal should be equal",
- bound.literal().value(), projected.literal().value());
+ bound.asLiteralPredicate().literal().value(), projected.literal().value());
} else {
Assert.assertNull("Literal should be null", projected.literal());
}
@@ -206,9 +206,9 @@ public class TestProjection {
"id", projected.ref().name());
Assert.assertEquals("Operation should match", bound.op(), projected.op());
- if (bound.literal() != null) {
+ if (bound.isLiteralPredicate()) {
Assert.assertEquals("Literal should be equal",
- bound.literal().value(), projected.literal().value());
+ bound.asLiteralPredicate().literal().value(), projected.literal().value());
} else {
Assert.assertNull("Literal should be null", projected.literal());
}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java
new file mode 100644
index 0000000..38aa991
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.transforms;
+
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.expressions.Binder;
+import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.Evaluator;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.False;
+import org.apache.iceberg.expressions.Literal;
+import org.apache.iceberg.expressions.Projections;
+import org.apache.iceberg.expressions.UnboundPredicate;
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+
+public class TestStartsWith {
+
+ private static final String COLUMN = "someStringCol";
+ private static final Schema SCHEMA = new Schema(optional(1, COLUMN, Types.StringType.get()));
+
+ @Test
+ public void testTruncateProjections() {
+ PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).truncate(COLUMN, 4).build();
+
+ assertProjectionInclusive(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH);
+ assertProjectionInclusive(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.STARTS_WITH);
+ assertProjectionInclusive(spec, startsWith(COLUMN, "ababab"), "abab", Expression.Operation.STARTS_WITH);
+
+ assertProjectionStrict(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH);
+ assertProjectionStrict(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.EQ);
+
+ Expression projection = Projections.strict(spec).project(startsWith(COLUMN, "ababab"));
+ Assert.assertTrue(projection instanceof False);
+ }
+
+ @Test
+ public void testTruncateString() {
+ Truncate<String> trunc = Truncate.get(Types.StringType.get(), 2);
+ Expression expr = startsWith(COLUMN, "abcde");
+ BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(), expr, false);
+
+ UnboundPredicate<String> projected = trunc.project(COLUMN, boundExpr);
+ Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected);
+
+ Assert.assertTrue("startsWith(abcde, truncate(abcde,2)) => true",
+ evaluator.eval(TestHelpers.Row.of("abcde")));
+ }
+
+ private void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
+ String expectedLiteral, Expression.Operation expectedOp) {
+ Expression projection = Projections.inclusive(spec).project(filter);
+ assertProjection(spec, expectedLiteral, projection, expectedOp);
+ }
+
+ private void assertProjectionStrict(PartitionSpec spec, UnboundPredicate<?> filter,
+ String expectedLiteral, Expression.Operation expectedOp) {
+ Expression projection = Projections.strict(spec).project(filter);
+ assertProjection(spec, expectedLiteral, projection, expectedOp);
+ }
+
+ private void assertProjection(PartitionSpec spec, String expectedLiteral, Expression projection,
+ Expression.Operation expectedOp) {
+ UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
+ Literal literal = predicate.literal();
+ Truncate<CharSequence> transform = (Truncate<CharSequence>) spec.getFieldsBySourceId(1).get(0).transform();
+ String output = transform.toHumanString((String) literal.value());
+
+ Assert.assertEquals(expectedOp, predicate.op());
+ Assert.assertEquals(expectedLiteral, output);
+ }
+}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java
index 57aefca..ed71a8b 100644
--- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java
@@ -20,6 +20,7 @@
package org.apache.iceberg.transforms;
import org.apache.iceberg.expressions.Literal;
+import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;
@@ -82,4 +83,25 @@ public class TestTimestamps {
Assert.assertEquals("Should produce \"null\" for null",
"null", Transforms.hour(type).toHumanString(null));
}
+
+ @Test
+ public void testTimestampsReturnType() {
+ Types.TimestampType type = Types.TimestampType.withZone();
+
+ Transform<Integer, Integer> year = Transforms.year(type);
+ Type yearResultType = year.getResultType(type);
+ Assert.assertEquals(Types.IntegerType.get(), yearResultType);
+
+ Transform<Integer, Integer> month = Transforms.month(type);
+ Type monthResultType = month.getResultType(type);
+ Assert.assertEquals(Types.IntegerType.get(), monthResultType);
+
+ Transform<Integer, Integer> day = Transforms.day(type);
+ Type dayResultType = day.getResultType(type);
+ Assert.assertEquals(Types.DateType.get(), dayResultType);
+
+ Transform<Integer, Integer> hour = Transforms.hour(type);
+ Type hourResultType = hour.getResultType(type);
+ Assert.assertEquals(Types.IntegerType.get(), hourResultType);
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java
index 5d3c5b0..787ec1e 100644
--- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java
@@ -43,7 +43,7 @@ public class TestTimestampsProjection {
private static final Schema SCHEMA = new Schema(optional(1, "timestamp", TYPE));
public void assertProjectionStrict(PartitionSpec spec, UnboundPredicate<?> filter,
- Expression.Operation expectedOp, String expectedLiteral) {
+ Expression.Operation expectedOp, String expectedLiteral) {
Expression projection = Projections.strict(spec).project(filter);
UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
@@ -57,21 +57,21 @@ public class TestTimestampsProjection {
}
public void assertProjectionStrictValue(PartitionSpec spec, UnboundPredicate<?> filter,
- Expression.Operation expectedOp) {
+ Expression.Operation expectedOp) {
Expression projection = Projections.strict(spec).project(filter);
Assert.assertEquals(projection.op(), expectedOp);
}
public void assertProjectionInclusiveValue(PartitionSpec spec, UnboundPredicate<?> filter,
- Expression.Operation expectedOp) {
+ Expression.Operation expectedOp) {
Expression projection = Projections.inclusive(spec).project(filter);
Assert.assertEquals(projection.op(), expectedOp);
}
public void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
- Expression.Operation expectedOp, String expectedLiteral) {
+ Expression.Operation expectedOp, String expectedLiteral) {
Expression projection = Projections.inclusive(spec).project(filter);
UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
@@ -88,10 +88,10 @@ public class TestTimestampsProjection {
Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("timestamp").build();
- assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11");
- assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-11");
- assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018-01");
- assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12");
+ assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017-12");
+ assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2017-12");
+ assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017-12");
+ assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017-11");
assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12");
assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);
}
@@ -101,10 +101,10 @@ public class TestTimestampsProjection {
Long date = (long) Literal.of("2017-12-31T23:59:59.999999").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("timestamp").build();
- assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11");
- assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12");
- assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018-01");
- assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2018-01");
+ assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017-12");
+ assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2018-01");
+ assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017-12");
+ assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017-12");
assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12");
assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);
}
@@ -140,10 +140,10 @@ public class TestTimestampsProjection {
Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("timestamp").build();
- assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11-30");
- assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-11-30");
- assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-02");
- assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01");
+ assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017-12-01");
+ assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2017-12-01");
+ assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017-12-01");
+ assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017-11-30");
assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01");
assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);
}
@@ -153,10 +153,10 @@ public class TestTimestampsProjection {
Long date = (long) Literal.of("2017-12-01T23:59:59.999999").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("timestamp").build();
- assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11-30");
- assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01");
- assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-02");
- assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-02");
+ assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017-12-01");
+ assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2017-12-02");
+ assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017-12-01");
+ assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017-12-01");
assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01");
assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);
}
@@ -192,10 +192,10 @@ public class TestTimestampsProjection {
Long date = (long) Literal.of("2017-01-01T00:00:00.00000").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("timestamp").build();
- assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2016");
- assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2016");
- assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018");
- assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017");
+ assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017");
+ assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2017");
+ assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017");
+ assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2016");
assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017");
assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);
}
@@ -205,10 +205,10 @@ public class TestTimestampsProjection {
Long date = (long) Literal.of("2017-12-31T23:59:59.999999").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("timestamp").build();
- assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2016");
- assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017");
- assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018");
- assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2018");
+ assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017");
+ assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2018");
+ assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017");
+ assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017");
assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017");
assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);
}
@@ -244,10 +244,10 @@ public class TestTimestampsProjection {
Long date = (long) Literal.of("2017-12-01T10:00:00.00000").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("timestamp").build();
- assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-09");
- assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-09");
- assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-11");
- assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-10");
+ assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017-12-01-10");
+ assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2017-12-01-10");
+ assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017-12-01-10");
+ assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017-12-01-09");
assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01-10");
assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);
}
@@ -257,10 +257,10 @@ public class TestTimestampsProjection {
Long date = (long) Literal.of("2017-12-01T10:59:59.999999").to(TYPE).value();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("timestamp").build();
- assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-09");
- assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-10");
- assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-11");
- assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-11");
+ assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT, "2017-12-01-10");
+ assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT, "2017-12-01-11");
+ assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT, "2017-12-01-10");
+ assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT, "2017-12-01-10");
assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01-10");
assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE);
}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java b/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java
deleted file mode 100644
index 0b2a945..0000000
--- a/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.iceberg.transforms;
-
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.TestHelpers;
-import org.apache.iceberg.types.Types;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class TestTransformSerialization {
- @Test
- public void testTransforms() throws Exception {
- Schema schema = new Schema(
- Types.NestedField.required(1, "i", Types.IntegerType.get()),
- Types.NestedField.required(2, "l", Types.LongType.get()),
- Types.NestedField.required(3, "d", Types.DateType.get()),
- Types.NestedField.required(4, "t", Types.TimeType.get()),
- Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()),
- Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)),
- Types.NestedField.required(7, "s", Types.StringType.get()),
- Types.NestedField.required(8, "u", Types.UUIDType.get()),
- Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)),
- Types.NestedField.required(10, "b", Types.BinaryType.get())
- );
-
- // a spec with all of the allowed transform/type pairs
- PartitionSpec[] specs = new PartitionSpec[] {
- PartitionSpec.builderFor(schema).identity("i").build(),
- PartitionSpec.builderFor(schema).identity("l").build(),
- PartitionSpec.builderFor(schema).identity("d").build(),
- PartitionSpec.builderFor(schema).identity("t").build(),
- PartitionSpec.builderFor(schema).identity("ts").build(),
- PartitionSpec.builderFor(schema).identity("dec").build(),
- PartitionSpec.builderFor(schema).identity("s").build(),
- PartitionSpec.builderFor(schema).identity("u").build(),
- PartitionSpec.builderFor(schema).identity("f").build(),
- PartitionSpec.builderFor(schema).identity("b").build(),
- PartitionSpec.builderFor(schema).bucket("i", 128).build(),
- PartitionSpec.builderFor(schema).bucket("l", 128).build(),
- PartitionSpec.builderFor(schema).bucket("d", 128).build(),
- PartitionSpec.builderFor(schema).bucket("t", 128).build(),
- PartitionSpec.builderFor(schema).bucket("ts", 128).build(),
- PartitionSpec.builderFor(schema).bucket("dec", 128).build(),
- PartitionSpec.builderFor(schema).bucket("s", 128).build(),
- PartitionSpec.builderFor(schema).bucket("u", 128).build(),
- PartitionSpec.builderFor(schema).bucket("f", 128).build(),
- PartitionSpec.builderFor(schema).bucket("b", 128).build(),
- PartitionSpec.builderFor(schema).year("d").build(),
- PartitionSpec.builderFor(schema).month("d").build(),
- PartitionSpec.builderFor(schema).day("d").build(),
- PartitionSpec.builderFor(schema).year("ts").build(),
- PartitionSpec.builderFor(schema).month("ts").build(),
- PartitionSpec.builderFor(schema).day("ts").build(),
- PartitionSpec.builderFor(schema).hour("ts").build(),
- PartitionSpec.builderFor(schema).truncate("i", 10).build(),
- PartitionSpec.builderFor(schema).truncate("l", 10).build(),
- PartitionSpec.builderFor(schema).truncate("dec", 10).build(),
- PartitionSpec.builderFor(schema).truncate("s", 10).build(),
- };
-
- for (PartitionSpec spec : specs) {
- Assert.assertEquals("Deserialization should produce equal partition spec",
- spec, TestHelpers.roundTripSerialize(spec));
- }
- }
-}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesProjection.java
new file mode 100644
index 0000000..a10b649
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesProjection.java
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.transforms;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.Literal;
+import org.apache.iceberg.expressions.Projections;
+import org.apache.iceberg.expressions.UnboundPredicate;
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound;
+import static org.apache.iceberg.expressions.Expressions.equal;
+import static org.apache.iceberg.expressions.Expressions.greaterThan;
+import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.lessThan;
+import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+
+public class TestTruncatesProjection {
+
+ public void assertProjectionStrict(PartitionSpec spec, UnboundPredicate<?> filter,
+ Expression.Operation expectedOp, String expectedLiteral) {
+
+ Expression projection = Projections.strict(spec).project(filter);
+ UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
+
+ Assert.assertEquals(expectedOp, predicate.op());
+
+ Literal literal = predicate.literal();
+ Truncate transform = (Truncate) spec.getFieldsBySourceId(1).get(0).transform();
+ String output = transform.toHumanString(literal.value());
+ Assert.assertEquals(expectedLiteral, output);
+ }
+
+ public void assertProjectionStrictValue(PartitionSpec spec, UnboundPredicate<?> filter,
+ Expression.Operation expectedOp) {
+
+ Expression projection = Projections.strict(spec).project(filter);
+ Assert.assertEquals(projection.op(), expectedOp);
+ }
+
+ public void assertProjectionInclusiveValue(PartitionSpec spec, UnboundPredicate<?> filter,
+ Expression.Operation expectedOp) {
+
+ Expression projection = Projections.inclusive(spec).project(filter);
+ Assert.assertEquals(projection.op(), expectedOp);
+ }
+
+ public void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
+ Expression.Operation expectedOp, String expectedLiteral) {
+ Expression projection = Projections.inclusive(spec).project(filter);
+ UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
+
+ Assert.assertEquals(predicate.op(), expectedOp);
+
+ Literal literal = predicate.literal();
+ Truncate transform = (Truncate) spec.getFieldsBySourceId(1).get(0).transform();
+ String output = transform.toHumanString(literal.value());
+ Assert.assertEquals(expectedLiteral, output);
+ }
+
+ @Test
+ public void testIntegerStrictLowerBound() {
+ Integer value = 100;
+ Schema schema = new Schema(optional(1, "value", Types.IntegerType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "100");
+ assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100");
+ assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "100");
+ assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "90");
+ assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "100");
+ assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
+ }
+
+ @Test
+ public void testIntegerStrictUpperBound() {
+ Integer value = 99;
+ Schema schema = new Schema(optional(1, "value", Types.IntegerType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "90");
+ assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100");
+ assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "90");
+ assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "90");
+ assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "90");
+ assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
+ }
+
+ @Test
+ public void testIntegerInclusiveLowerBound() {
+ Integer value = 100;
+ Schema schema = new Schema(optional(1, "value", Types.IntegerType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "90");
+ assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "100");
+ assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100");
+ assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "100");
+ assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "100");
+ assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
+ }
+
+ @Test
+ public void testIntegerInclusiveUpperBound() {
+ Integer value = 99;
+ Schema schema = new Schema(optional(1, "value", Types.IntegerType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "90");
+ assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "90");
+ assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100");
+ assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "90");
+ assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "90");
+ assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
+ }
+
+ @Test
+ public void testLongStrictLowerBound() {
+ Long value = 100L;
+ Schema schema = new Schema(optional(1, "value", Types.LongType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "100");
+ assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100");
+ assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "100");
+ assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "90");
+ assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "100");
+ assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
+ }
+
+ @Test
+ public void testLongStrictUpperBound() {
+ Long value = 99L;
+ Schema schema = new Schema(optional(1, "value", Types.LongType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "90");
+ assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100");
+ assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "90");
+ assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "90");
+ assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "90");
+ assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
+ }
+
+ @Test
+ public void testLongInclusiveLowerBound() {
+ Long value = 100L;
+ Schema schema = new Schema(optional(1, "value", Types.LongType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "90");
+ assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "100");
+ assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100");
+ assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "100");
+ assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "100");
+ assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
+ }
+
+ @Test
+ public void testLongInclusiveUpperBound() {
+ Long value = 99L;
+ Schema schema = new Schema(optional(1, "value", Types.LongType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "90");
+ assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "90");
+ assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100");
+ assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "90");
+ assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "90");
+ assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
+ }
+
+ @Test
+ public void testDecimalStrictLowerBound() {
+ Types.DecimalType type = Types.DecimalType.of(9, 2);
+ BigDecimal value = (BigDecimal) Literal.of("100.00").to(type).value();
+ Schema schema = new Schema(optional(1, "value", type));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "100.00");
+ assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100.00");
+ assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "100.00");
+ assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "99.90");
+ assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "100.00");
+ assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
+ }
+
+ @Test
+ public void testDecimalStrictUpperBound() {
+ Types.DecimalType type = Types.DecimalType.of(9, 2);
+ BigDecimal value = (BigDecimal) Literal.of("99.99").to(type).value();
+ Schema schema = new Schema(optional(1, "value", type));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "99.90");
+ assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100.00");
+ assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "99.90");
+ assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "99.90");
+ assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "99.90");
+ assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
+ }
+
+ @Test
+ public void testDecimalInclusiveLowerBound() {
+ Types.DecimalType type = Types.DecimalType.of(9, 2);
+ BigDecimal value = (BigDecimal) Literal.of("100.00").to(type).value();
+ Schema schema = new Schema(optional(1, "value", type));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "99.90");
+ assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "100.00");
+ assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100.00");
+ assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "100.00");
+ assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "100.00");
+ assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
+ }
+
+ @Test
+ public void testDecimalInclusiveUpperBound() {
+ Types.DecimalType type = Types.DecimalType.of(9, 2);
+ BigDecimal value = (BigDecimal) Literal.of("99.99").to(type).value();
+ Schema schema = new Schema(optional(1, "value", type));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "99.90");
+ assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "99.90");
+ assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "100.00");
+ assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "99.90");
+ assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "99.90");
+ assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
+ }
+
+ @Test
+ public void testStringStrict() {
+ String value = "abcdefg";
+ Schema schema = new Schema(optional(1, "value", Types.StringType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();
+
+ assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "abcde");
+ assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "abcde");
+ assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "abcde");
+ assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "abcde");
+ assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "abcde");
+ assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
+ }
+
+ @Test
+ public void testStringInclusive() {
+ String value = "abcdefg";
+ Schema schema = new Schema(optional(1, "value", Types.StringType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();
+
+ assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, "abcde");
+ assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, "abcde");
+ assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, "abcde");
+ assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, "abcde");
+ assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, "abcde");
+ assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
+ }
+
+ @Test
+ public void testBinaryStrict() throws Exception {
+ ByteBuffer value = ByteBuffer.wrap("abcdefg".getBytes("UTF-8"));
+ Schema schema = new Schema(optional(1, "value", Types.BinaryType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();
+ String expectedValue = TransformUtil.base64encode(ByteBuffer.wrap("abcde".getBytes("UTF-8")));
+
+ assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, expectedValue);
+ assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, expectedValue);
+ assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, expectedValue);
+ assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, expectedValue);
+ assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, expectedValue);
+ assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
+ }
+
+ @Test
+ public void testBinaryInclusive() throws Exception {
+ ByteBuffer value = ByteBuffer.wrap("abcdefg".getBytes("UTF-8"));
+ Schema schema = new Schema(optional(1, "value", Types.BinaryType.get()));
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();
+ String expectedValue = TransformUtil.base64encode(ByteBuffer.wrap("abcde".getBytes("UTF-8")));
+
+ assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, expectedValue);
+ assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, expectedValue);
+ assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, expectedValue);
+ assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, expectedValue);
+ assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, expectedValue);
+ assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);
+ }
+}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java
new file mode 100644
index 0000000..f2160d5
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.transforms;
+
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.ResidualEvaluator;
+import org.apache.iceberg.expressions.UnboundPredicate;
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound;
+import static org.apache.iceberg.expressions.Expressions.equal;
+import static org.apache.iceberg.expressions.Expressions.greaterThan;
+import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.lessThan;
+import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
+
+public class TestTruncatesResiduals {
+
+ /**
+ * Test helper method to compute residual for a given partitionValue against a predicate
+ * and assert the resulting residual expression is same as the exprectedOp
+ *
+ * @param spec the partition spec
+ * @param predicate predicate to calculate the residual against
+ * @param partitionValue value of the partition to check the residual for
+ * @param expectedOp expected operation to assert against
+ * @param <T> Type parameter of partitionValue
+ */
+ public <T> void assertResidualValue(PartitionSpec spec, UnboundPredicate<?> predicate,
+ T partitionValue, Expression.Operation expectedOp) {
+ ResidualEvaluator resEval = ResidualEvaluator.of(spec, predicate, true);
+ Expression residual = resEval.residualFor(TestHelpers.Row.of(partitionValue));
+
+ Assert.assertEquals(expectedOp, residual.op());
+ }
+
+ /**
+ * Test helper method to compute residual for a given partitionValue against a predicate
+ * and assert that the resulting expression is same as the original predicate
+ *
+ * @param spec the partition spec
+ * @param predicate predicate to calculate the residual against
+ * @param partitionValue value of the partition to check the residual for
+ * @param <T> Type parameter of partitionValue
+ */
+ public <T> void assertResidualPredicate(PartitionSpec spec,
+ UnboundPredicate<?> predicate, T partitionValue) {
+ ResidualEvaluator resEval = ResidualEvaluator.of(spec, predicate, true);
+ Expression residual = resEval.residualFor(TestHelpers.Row.of(partitionValue));
+
+ UnboundPredicate<?> unbound = assertAndUnwrapUnbound(residual);
+ Assert.assertEquals(predicate.op(), unbound.op());
+ Assert.assertEquals(predicate.ref().name(), unbound.ref().name());
+ Assert.assertEquals(predicate.literal().value(), unbound.literal().value());
+ }
+
+ @Test
+ public void testIntegerTruncateTransformResiduals() {
+ Schema schema = new Schema(Types.NestedField.optional(50, "value", Types.IntegerType.get()));
+ // valid partitions would be 0, 10, 20...90, 100 etc.
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();
+
+ // less than lower bound
+ assertResidualValue(spec, lessThan("value", 100), 110, Expression.Operation.FALSE);
+ assertResidualValue(spec, lessThan("value", 100), 100, Expression.Operation.FALSE);
+ assertResidualValue(spec, lessThan("value", 100), 90, Expression.Operation.TRUE);
+ // less than upper bound
+ assertResidualValue(spec, lessThan("value", 99), 100, Expression.Operation.FALSE);
+ assertResidualPredicate(spec, lessThan("value", 99), 90);
+ assertResidualValue(spec, lessThan("value", 99), 80, Expression.Operation.TRUE);
+
+ // less than equals lower bound
+ assertResidualValue(spec, lessThanOrEqual("value", 100), 110, Expression.Operation.FALSE);
+ assertResidualPredicate(spec, lessThanOrEqual("value", 100), 100);
+ assertResidualValue(spec, lessThanOrEqual("value", 100), 90, Expression.Operation.TRUE);
+ // less than equals upper bound
+ assertResidualValue(spec, lessThanOrEqual("value", 99), 100, Expression.Operation.FALSE);
+ assertResidualValue(spec, lessThanOrEqual("value", 99), 90, Expression.Operation.TRUE);
+ assertResidualValue(spec, lessThanOrEqual("value", 99), 80, Expression.Operation.TRUE);
+
+ // greater than lower bound
+ assertResidualValue(spec, greaterThan("value", 100), 110, Expression.Operation.TRUE);
+ assertResidualPredicate(spec, greaterThan("value", 100), 100);
+ assertResidualValue(spec, greaterThan("value", 100), 90, Expression.Operation.FALSE);
+ // greater than upper bound
+ assertResidualValue(spec, greaterThan("value", 99), 100, Expression.Operation.TRUE);
+ assertResidualValue(spec, greaterThan("value", 99), 90, Expression.Operation.FALSE);
+ assertResidualValue(spec, greaterThan("value", 99), 80, Expression.Operation.FALSE);
+
+ // greater than equals lower bound
+ assertResidualValue(spec, greaterThanOrEqual("value", 100), 110, Expression.Operation.TRUE);
+ assertResidualValue(spec, greaterThanOrEqual("value", 100), 100, Expression.Operation.TRUE);
+ assertResidualValue(spec, greaterThanOrEqual("value", 100), 90, Expression.Operation.FALSE);
+ // greater than equals upper bound
+ assertResidualValue(spec, greaterThanOrEqual("value", 99), 100, Expression.Operation.TRUE);
+ assertResidualPredicate(spec, greaterThanOrEqual("value", 99), 90);
+ assertResidualValue(spec, greaterThanOrEqual("value", 99), 80, Expression.Operation.FALSE);
+
+ // equal lower bound
+ assertResidualValue(spec, equal("value", 100), 110, Expression.Operation.FALSE);
+ assertResidualPredicate(spec, equal("value", 100), 100);
+ assertResidualValue(spec, equal("value", 100), 90, Expression.Operation.FALSE);
+ // equal upper bound
+ assertResidualValue(spec, equal("value", 99), 100, Expression.Operation.FALSE);
+ assertResidualPredicate(spec, equal("value", 99), 90);
+ assertResidualValue(spec, equal("value", 99), 80, Expression.Operation.FALSE);
+
+ // not equal lower bound
+ assertResidualValue(spec, notEqual("value", 100), 110, Expression.Operation.TRUE);
+ assertResidualPredicate(spec, notEqual("value", 100), 100);
+ assertResidualValue(spec, notEqual("value", 100), 90, Expression.Operation.TRUE);
+ // not equal upper bound
+ assertResidualValue(spec, notEqual("value", 99), 100, Expression.Operation.TRUE);
+ assertResidualPredicate(spec, notEqual("value", 99), 90);
+ assertResidualValue(spec, notEqual("value", 99), 80, Expression.Operation.TRUE);
+ }
+
+ @Test
+ public void testStringTruncateTransformResiduals() {
+ Schema schema = new Schema(Types.NestedField.optional(50, "value", Types.StringType.get()));
+ // valid partitions would be two letter strings for eg: ab, bc etc
+ PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 2).build();
+
+ // less than
+ assertResidualValue(spec, lessThan("value", "bcd"), "ab", Expression.Operation.TRUE);
+ assertResidualPredicate(spec, lessThan("value", "bcd"), "bc");
+ assertResidualValue(spec, lessThan("value", "bcd"), "cd", Expression.Operation.FALSE);
+
+ // less than equals
+ assertResidualValue(spec, lessThanOrEqual("value", "bcd"), "ab", Expression.Operation.TRUE);
+ assertResidualPredicate(spec, lessThanOrEqual("value", "bcd"), "bc");
+ assertResidualValue(spec, lessThanOrEqual("value", "bcd"), "cd", Expression.Operation.FALSE);
+
+ // greater than
+ assertResidualValue(spec, greaterThan("value", "bcd"), "ab", Expression.Operation.FALSE);
+ assertResidualPredicate(spec, greaterThan("value", "bcd"), "bc");
+ assertResidualValue(spec, greaterThan("value", "bcd"), "cd", Expression.Operation.TRUE);
+
+ // greater than
+ assertResidualValue(spec, greaterThanOrEqual("value", "bcd"), "ab", Expression.Operation.FALSE);
+ assertResidualPredicate(spec, greaterThanOrEqual("value", "bcd"), "bc");
+ assertResidualValue(spec, greaterThanOrEqual("value", "bcd"), "cd", Expression.Operation.TRUE);
+
+ // equal
+ assertResidualValue(spec, equal("value", "bcd"), "ab", Expression.Operation.FALSE);
+ assertResidualPredicate(spec, equal("value", "bcd"), "bc");
+ assertResidualValue(spec, equal("value", "bcd"), "cd", Expression.Operation.FALSE);
+
+ // not equal
+ assertResidualValue(spec, notEqual("value", "bcd"), "ab", Expression.Operation.TRUE);
+ assertResidualPredicate(spec, notEqual("value", "bcd"), "bc");
+ assertResidualValue(spec, notEqual("value", "bcd"), "cd", Expression.Operation.TRUE);
+
+ // starts with
+ assertResidualValue(spec, startsWith("value", "bcd"), "ab", Expression.Operation.FALSE);
+ assertResidualPredicate(spec, startsWith("value", "bcd"), "bc");
+ assertResidualValue(spec, startsWith("value", "bcd"), "cd", Expression.Operation.FALSE);
+ }
+}
diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java
index 78c4c71..fce56d8 100644
--- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java
+++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java
@@ -24,6 +24,7 @@ import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
+import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.types.Types.BinaryType;
import org.apache.iceberg.types.Types.BooleanType;
import org.apache.iceberg.types.Types.DateType;
@@ -47,40 +48,66 @@ public class TestConversions {
// booleans are stored as 0x00 for 'false' and a non-zero byte for 'true'
assertConversion(false, BooleanType.get(), new byte[]{0x00});
assertConversion(true, BooleanType.get(), new byte[]{0x01});
+ Assert.assertArrayEquals(new byte[]{0x00}, Literal.of(false).toByteBuffer().array());
+ Assert.assertArrayEquals(new byte[]{0x01}, Literal.of(true).toByteBuffer().array());
+
// integers are stored as 4 bytes in little-endian order
// 84202 is 0...01|01001000|11101010 in binary
// 11101010 -> -22, 01001000 -> 72, 00000001 -> 1, 00000000 -> 0
assertConversion(84202, IntegerType.get(), new byte[]{-22, 72, 1, 0});
+ Assert.assertArrayEquals(new byte[]{-22, 72, 1, 0}, Literal.of(84202).toByteBuffer().array());
+
// longs are stored as 8 bytes in little-endian order
// 200L is 0...0|11001000 in binary
// 11001000 -> -56, 00000000 -> 0, ... , 00000000 -> 0
assertConversion(200L, LongType.get(), new byte[]{-56, 0, 0, 0, 0, 0, 0, 0});
+ Assert.assertArrayEquals(new byte[]{-56, 0, 0, 0, 0, 0, 0, 0}, Literal.of(200L).toByteBuffer().array());
+
// floats are stored as 4 bytes in little-endian order
// floating point numbers are represented as sign * 2ˆexponent * mantissa
// -4.5F is -1 * 2ˆ2 * 1.125 and encoded as 11000000|10010000|0...0 in binary
// 00000000 -> 0, 00000000 -> 0, 10010000 -> -112, 11000000 -> -64,
assertConversion(-4.5F, FloatType.get(), new byte[]{0, 0, -112, -64});
+ Assert.assertArrayEquals(new byte[]{0, 0, -112, -64}, Literal.of(-4.5F).toByteBuffer().array());
+
// doubles are stored as 8 bytes in little-endian order
// floating point numbers are represented as sign * 2ˆexponent * mantissa
// 6.0 is 1 * 2ˆ4 * 1.5 and encoded as 01000000|00011000|0...0
// 00000000 -> 0, ... , 00011000 -> 24, 01000000 -> 64
assertConversion(6.0, DoubleType.get(), new byte[]{0, 0, 0, 0, 0, 0, 24, 64});
+ Assert.assertArrayEquals(new byte[]{0, 0, 0, 0, 0, 0, 24, 64}, Literal.of(6.0).toByteBuffer().array());
+
// dates are stored as days from 1970-01-01 in a 4-byte little-endian int
// 1000 is 0...0|00000011|11101000 in binary
// 11101000 -> -24, 00000011 -> 3, ... , 00000000 -> 0
assertConversion(1000, DateType.get(), new byte[]{-24, 3, 0, 0});
+ Assert.assertArrayEquals(new byte[]{-24, 3, 0, 0}, Literal.of(1000).to(DateType.get()).toByteBuffer().array());
+
// time is stored as microseconds from midnight in an 8-byte little-endian long
// 10000L is 0...0|00100111|00010000 in binary
// 00010000 -> 16, 00100111 -> 39, ... , 00000000 -> 0
assertConversion(10000L, TimeType.get(), new byte[]{16, 39, 0, 0, 0, 0, 0, 0});
+ Assert.assertArrayEquals(
+ new byte[]{16, 39, 0, 0, 0, 0, 0, 0},
+ Literal.of(10000L).to(TimeType.get()).toByteBuffer().array());
+
// timestamps are stored as microseconds from 1970-01-01 00:00:00.000000 in an 8-byte little-endian long
// 400000L is 0...110|00011010|10000000 in binary
// 10000000 -> -128, 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0
assertConversion(400000L, TimestampType.withoutZone(), new byte[]{-128, 26, 6, 0, 0, 0, 0, 0});
assertConversion(400000L, TimestampType.withZone(), new byte[]{-128, 26, 6, 0, 0, 0, 0, 0});
+ Assert.assertArrayEquals(
+ new byte[]{-128, 26, 6, 0, 0, 0, 0, 0},
+ Literal.of(400000L).to(TimestampType.withoutZone()).toByteBuffer().array());
+ Assert.assertArrayEquals(
+ new byte[]{-128, 26, 6, 0, 0, 0, 0, 0},
+ Literal.of(400000L).to(TimestampType.withZone()).toByteBuffer().array());
+
// strings are stored as UTF-8 bytes (without length)
// 'A' -> 65, 'B' -> 66, 'C' -> 67
assertConversion(CharBuffer.wrap("ABC"), StringType.get(), new byte[]{65, 66, 67});
+ Assert.assertArrayEquals(new byte[]{65, 66, 67}, Literal.of("ABC").toByteBuffer().array());
+
// uuids are stored as 16-byte big-endian values
// f79c3e09-677c-4bbd-a479-3f349cb785e7 is encoded as F7 9C 3E 09 67 7C 4B BD A4 79 3F 34 9C B7 85 E7
// 0xF7 -> 11110111 -> -9, 0x9C -> 10011100 -> -100, 0x3E -> 00111110 -> 62,
@@ -93,18 +120,30 @@ public class TestConversions {
UUID.fromString("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
UUIDType.get(),
new byte[]{-9, -100, 62, 9, 103, 124, 75, -67, -92, 121, 63, 52, -100, -73, -123, -25});
+ Assert.assertArrayEquals(
+ new byte[]{-9, -100, 62, 9, 103, 124, 75, -67, -92, 121, 63, 52, -100, -73, -123, -25},
+ Literal.of(UUID.fromString("f79c3e09-677c-4bbd-a479-3f349cb785e7")).toByteBuffer().array());
+
// fixed values are stored directly
// 'a' -> 97, 'b' -> 98
assertConversion(
ByteBuffer.wrap("ab".getBytes(StandardCharsets.UTF_8)),
FixedType.ofLength(2),
new byte[]{97, 98});
+ Assert.assertArrayEquals(
+ new byte[]{97, 98},
+ Literal.of("ab".getBytes(StandardCharsets.UTF_8)).toByteBuffer().array());
+
// binary values are stored directly
// 'Z' -> 90
assertConversion(
ByteBuffer.wrap("Z".getBytes(StandardCharsets.UTF_8)),
BinaryType.get(),
new byte[]{90});
+ Assert.assertArrayEquals(
+ new byte[]{90},
+ Literal.of(ByteBuffer.wrap("Z".getBytes(StandardCharsets.UTF_8))).toByteBuffer().array());
+
// decimals are stored as unscaled values in the form of two's-complement big-endian binary,
// using the minimum number of bytes for the values
// 345 is 0...1|01011001 in binary
@@ -113,6 +152,9 @@ public class TestConversions {
new BigDecimal("3.45"),
DecimalType.of(3, 2),
new byte[]{1, 89});
+ Assert.assertArrayEquals(
+ new byte[]{1, 89},
+ Literal.of(new BigDecimal("3.45")).toByteBuffer().array());
}
private <T> void assertConversion(T value, Type type, byte[] expectedBinary) {
diff --git a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java
index c2af6bf..cb5325d 100644
--- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java
+++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java
@@ -385,4 +385,26 @@ public class TestReadabilityChecks {
Assert.assertNotNull(schema.caseInsensitiveSelect("LoCaTiOnS.LaT").findField(1));
Assert.assertNotNull(schema.caseInsensitiveSelect("locations.LONG").findField(2));
}
+
+ @Test
+ public void testCheckNullabilityRequiredSchemaField() {
+ Schema write = new Schema(optional(1, "from_field", Types.IntegerType.get()));
+ Schema read = new Schema(required(1, "to_field", Types.IntegerType.get()));
+
+ List<String> errors = CheckCompatibility.typeCompatibilityErrors(read, write);
+ Assert.assertEquals("Should produce no error messages", 0, errors.size());
+ }
+
+ @Test
+ public void testCheckNullabilityRequiredStructField() {
+ Schema write = new Schema(required(0, "nested", Types.StructType.of(
+ optional(1, "from_field", Types.IntegerType.get())
+ )));
+ Schema read = new Schema(required(0, "nested", Types.StructType.of(
+ required(1, "to_field", Types.IntegerType.get())
+ )));
+
+ List<String> errors = CheckCompatibility.typeCompatibilityErrors(read, write);
+ Assert.assertEquals("Should produce no error messages", 0, errors.size());
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java
index 82e0636..14247cb 100644
--- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java
+++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java
@@ -142,10 +142,10 @@ public class TestSerializableTypes {
Types.StringType.get(),
Types.StringType.get()
)),
- required(8, "complex_key_map", Types.MapType.ofOptional(20, 21,
+ required(20, "complex_key_map", Types.MapType.ofOptional(21, 22,
Types.StructType.of(
- required(22, "x", Types.LongType.get()),
- optional(23, "y", Types.LongType.get())),
+ required(23, "x", Types.LongType.get()),
+ optional(24, "y", Types.LongType.get())),
Types.StringType.get()))
);
diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
new file mode 100644
index 0000000..8d3d4b4
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.iceberg.types;
+
+import org.apache.iceberg.Schema;
+import org.junit.Test;
+
+import static org.apache.iceberg.types.Types.NestedField.required;
+
+
+public class TestTypeUtil {
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testReassignIdsIllegalArgumentException() {
+ Schema schema = new Schema(
+ required(1, "a", Types.IntegerType.get()),
+ required(2, "b", Types.IntegerType.get())
+ );
+ Schema sourceSchema = new Schema(
+ required(1, "a", Types.IntegerType.get())
+ );
+ TypeUtil.reassignIds(schema, sourceSchema);
+ }
+
+ @Test(expected = RuntimeException.class)
+ public void testValidateSchemaViaIndexByName() {
+ Types.NestedField nestedType = Types.NestedField
+ .required(1, "a", Types.StructType.of(
+ required(2, "b", Types.StructType.of(
+ required(3, "c", Types.BooleanType.get())
+ )),
+ required(4, "b.c", Types.BooleanType.get())
+ )
+ );
+
+ TypeUtil.indexByName(Types.StructType.of(nestedType));
+ }
+}
diff --git a/baseline.gradle b/baseline.gradle
new file mode 100644
index 0000000..d0c122a
--- /dev/null
+++ b/baseline.gradle
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+apply plugin: 'com.palantir.baseline-config'
+
+allprojects {
+ apply plugin: 'com.palantir.baseline-idea'
+}
+
+subprojects {
+ // Currently, if any subproject applies the blanket Baseline plugin, it forces the Baseline plugin
+ // to be applied to ALL projects. And we are not prepared to address all of the build errors that
+ // occur as a result at this time. Furthermore, baseline-format will not work out of the box for
+ // us - see below.
+
+ // Thus we concede to applying all of the Baseline plugins individually on all the projects we are
+ // ready to enforce linting on.
+ apply plugin: 'org.inferred.processors'
+ apply plugin: 'com.palantir.baseline-checkstyle'
+ apply plugin: 'com.palantir.baseline-error-prone'
+ apply plugin: 'com.palantir.baseline-scalastyle'
+ apply plugin: 'com.palantir.baseline-class-uniqueness'
+ apply plugin: 'com.palantir.baseline-reproducibility'
+ apply plugin: 'com.palantir.baseline-exact-dependencies'
+ apply plugin: 'com.palantir.baseline-release-compatibility'
+
+ // Can't use the built-in Baseline spotless format because it's opinionated about the import
+ // order of having static imports after non-static imports, and this cannot be overridden.
+
+ // So we apply Spotless manually to get a similar effect to baseline-format, but change the
+ // import order.
+ pluginManager.withPlugin('com.diffplug.gradle.spotless') {
+ spotless {
+ java {
+ target 'src/main/java/**/*.java', 'src/main/test/**/*.java'
+ removeUnusedImports()
+ importOrder(['', 'static '])
+ trimTrailingWhitespace()
+ indentWithSpaces 2
+ endWithNewline()
+ }
+ }
+ }
+
+ tasks.withType(JavaCompile).configureEach {
+ options.errorprone.errorproneArgs += [
+ // error-prone is slow, don't run on tests
+ '-XepExcludedPaths:.*/test/.*',
+ // specific to Palantir
+ '-Xep:PreferSafeLoggingPreconditions:OFF',
+ '-Xep:PreferSafeLoggableExceptions:OFF',
+ '-Xep:Slf4jLogsafeArgs:OFF',
+ // subclasses are not equal
+ '-Xep:EqualsGetClass:OFF',
+ // patterns that are allowed
+ '-Xep:SwitchStatementDefaultCase:OFF',
+ '-Xep:MissingCasesInEnumSwitch:OFF',
+ '-Xep:TypeParameterShadowing:OFF',
+ '-Xep:TypeParameterUnusedInFormals:OFF',
+ ]
+ }
+}
diff --git a/build.gradle b/build.gradle
index 9071a61..57bbd87 100644
--- a/build.gradle
+++ b/build.gradle
@@ -17,6 +17,8 @@
* under the License.
*/
+import groovy.transform.Memoized
+
buildscript {
repositories {
jcenter()
@@ -30,12 +32,10 @@ buildscript {
classpath 'com.diffplug.spotless:spotless-plugin-gradle:3.14.0'
classpath 'gradle.plugin.org.inferred:gradle-processors:2.1.0'
classpath 'me.champeau.gradle:jmh-gradle-plugin:0.4.8'
- classpath 'com.netflix.nebula:nebula-publishing-plugin:9.5.0'
}
}
plugins {
- id 'nebula.netflixoss' version '4.1.0'
id 'com.palantir.git-version' version '0.9.1'
id 'com.palantir.consistent-versions' version '1.9.2'
}
@@ -46,8 +46,7 @@ if (JavaVersion.current() != JavaVersion.VERSION_1_8) {
allprojects {
group = "org.apache.iceberg"
- apply plugin: 'com.palantir.baseline-idea'
- version = gitVersion()
+ version = getProjectVersion()
repositories {
maven { url "http://palantir.bintray.com/releases" }
mavenCentral()
@@ -55,17 +54,16 @@ allprojects {
}
}
-apply plugin: 'com.palantir.baseline-config'
-
subprojects {
- apply plugin: 'nebula.javadoc-jar'
- apply plugin: 'nebula.source-jar'
apply plugin: 'java'
- apply plugin: 'maven' // make pom files for deployment
- apply plugin: 'nebula.maven-base-publish'
- artifacts {
- archives sourceJar
+ configurations {
+ testCompile.extendsFrom compileOnly
+ all {
+ exclude group: 'org.slf4j', module: 'slf4j-log4j12'
+ }
+
+ testArtifacts
}
compileJava {
@@ -76,15 +74,6 @@ subprojects {
options.encoding = "UTF-8"
}
- configurations {
- testCompile.extendsFrom compileOnly
- all {
- exclude group: 'org.slf4j', module: 'slf4j-log4j12'
- }
-
- testArtifacts
- }
-
ext {
jmhVersion = '1.21'
}
@@ -94,130 +83,22 @@ subprojects {
dependencies {
compile 'org.slf4j:slf4j-api'
- compile 'com.google.guava:guava'
+ compile('com.google.guava:guava') {
+ // may be LGPL - use ALv2 findbugs-annotations instead
+ exclude group: 'com.google.code.findbugs'
+ }
+ compile 'com.github.stephenc.findbugs:findbugs-annotations:1.3.9-1'
testCompile 'junit:junit'
testCompile 'org.slf4j:slf4j-simple'
testCompile 'org.mockito:mockito-core'
}
-
- task testJar(type: Jar){
- archiveClassifier = 'tests'
- from sourceSets.test.output
- }
-
- artifacts {
- testArtifacts testJar
- }
-
- publishing {
- publications {
- nebula(MavenPublication) {
- from components.java
- versionMapping {
- allVariants {
- fromResolutionResult()
- }
- }
- }
- }
- }
}
-task aggregateJavadoc(type: Javadoc) {
- // use the branch name in place of version in Javadoc
- project.rootProject.version = versionDetails().branchName
-
- dependsOn subprojects.javadoc
- source subprojects.javadoc.source
- destinationDir project.rootProject.file("site/docs/javadoc/${versionDetails().branchName}")
- classpath = project.rootProject.files(subprojects.javadoc.classpath)
-}
-
-task removeJavadoc(type: Exec) {
- commandLine 'rm', '-rf', "site/docs/javadoc/${versionDetails().branchName}"
-}
-
-task refreshJavadoc(type: Exec) {
- dependsOn aggregateJavadoc
- dependsOn removeJavadoc
- aggregateJavadoc.mustRunAfter removeJavadoc
- commandLine 'git', 'add', "site/docs/javadoc/${versionDetails().branchName}"
-}
-
-task deploySite(type: Exec) {
- workingDir 'site'
- commandLine 'mkdocs', 'gh-deploy'
-}
-
-// The following code is temporary to allow for incrementally migrating projects to use Baseline.
-
-// We enable baseline-idea everywhere so that everyone can use IntelliJ to build code against the
-// Baseline style guide.
-def baselineProjects = [ project("iceberg-api"), project("iceberg-common"), project("iceberg-core"),
- project("iceberg-data"), project("iceberg-orc"), project("iceberg-spark"),
- project("iceberg-hive") ]
-
-
-configure(subprojects - baselineProjects) {
- // error-prone is brought in with baseline-idea, but we're not prepared to handle error-prone
- // linting errors everywhere yet.
- pluginManager.withPlugin("com.palantir.baseline-error-prone") {
- tasks.withType(JavaCompile).configureEach { task ->
- options.errorprone.enabled = false
- }
- }
-}
-
-configure(baselineProjects) {
- // Currently, if any subproject applies the blanket Baseline plugin, it forces the Baseline plugin
- // to be applied to ALL projects. And we are not prepared to address all of the build errors that
- // occur as a result at this time. Furthermore, baseline-format will not work out of the box for
- // us - see below.
-
- // Thus we concede to applying all of the Baseline plugins individually on all the projects we are
- // ready to enforce linting on.
- apply plugin: 'org.inferred.processors'
- //apply plugin: 'com.palantir.baseline-checkstyle'
- apply plugin: 'com.palantir.baseline-scalastyle'
- apply plugin: 'com.palantir.baseline-class-uniqueness'
- apply plugin: 'com.palantir.baseline-reproducibility'
- apply plugin: 'com.palantir.baseline-exact-dependencies'
- apply plugin: 'com.palantir.baseline-release-compatibility'
-
- // Can't use the built-in Baseline spotless format because it's opinionated about the import
- // order of having static imports after non-static imports, and this cannot be overridden.
-
- // So we apply Spotless manually to get a similar effect to baseline-format, but change the
- // import order.
- pluginManager.withPlugin('com.diffplug.gradle.spotless') {
- spotless {
- java {
- target 'src/main/java/**/*.java', 'src/main/test/**/*.java'
- removeUnusedImports()
- importOrder(['', 'static '])
- trimTrailingWhitespace()
- indentWithSpaces 2
- endWithNewline()
- }
- }
- }
-}
-
-def jmhProjects = [ project("iceberg-spark") ]
-
-configure(jmhProjects) {
- apply plugin: 'me.champeau.gradle.jmh'
-
- jmh {
- jmhVersion = jmhVersion
- failOnError = true
- forceGC = true
- includeTests = true
- humanOutputFile = file(jmhOutputPath)
- include = [jmhIncludeRegex]
- }
-}
+apply from: 'baseline.gradle'
+apply from: 'deploy.gradle'
+apply from: 'tasks.gradle'
+apply from: 'jmh.gradle'
project(':iceberg-api') {
dependencies {
@@ -236,10 +117,6 @@ project(':iceberg-core') {
compile("org.apache.avro:avro") {
exclude group: 'org.tukaani' // xz compression is not supported
}
- compile("org.apache.arrow:arrow-vector") {
- exclude group: 'io.netty', module: 'netty-buffer'
- exclude group: 'io.netty', module: 'netty-common'
- }
compileOnly("org.apache.spark:spark-hive_2.11") {
exclude group: 'org.apache.avro', module: 'avro'
}
@@ -256,7 +133,6 @@ project(':iceberg-core') {
exclude group: 'io.netty', module: 'netty-buffer'
exclude group: 'io.netty', module: 'netty-common'
}
-
testCompile project(path: ':iceberg-api', configuration: 'testArtifacts')
}
}
@@ -265,15 +141,14 @@ project(':iceberg-data') {
dependencies {
compile project(':iceberg-api')
compile project(':iceberg-core')
- compileOnly project(':iceberg-spark')
compileOnly project(':iceberg-parquet')
- compileOnly("org.apache.spark:spark-hive_2.11") {
- exclude group: 'org.apache.avro', module: 'avro'
- }
+
testCompile("org.apache.hadoop:hadoop-client") {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}
+
+ testCompile project(path: ':iceberg-api', configuration: 'testArtifacts')
}
test {
@@ -314,6 +189,8 @@ project(':iceberg-hive') {
exclude group: 'org.apache.avro', module: 'avro'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}
+
+ testCompile project(path: ':iceberg-api', configuration: 'testArtifacts')
}
}
@@ -359,6 +236,17 @@ project(':iceberg-parquet') {
}
}
+project(':iceberg-arrow') {
+ dependencies {
+ // compile project(':iceberg-spark')
+ compile project(':iceberg-api')
+
+ compileOnly("org.apache.spark:spark-hive_2.11") {
+ exclude group: 'org.apache.avro', module: 'avro'
+ }
+ }
+}
+
project(':iceberg-spark') {
apply plugin: 'scala'
@@ -381,17 +269,7 @@ project(':iceberg-spark') {
exclude group: 'org.apache.avro', module: 'avro'
}
testCompile project(path: ':iceberg-hive', configuration: 'testArtifacts')
- }
-}
-
-project(':iceberg-arrow') {
- dependencies {
-// compile project(':iceberg-spark')
- compile project(':iceberg-api')
-
- compileOnly("org.apache.spark:spark-hive_2.11") {
- exclude group: 'org.apache.avro', module: 'avro'
- }
+ testCompile project(path: ':iceberg-api', configuration: 'testArtifacts')
}
}
@@ -402,6 +280,8 @@ project(':iceberg-pig') {
compile project(':iceberg-core')
compile project(':iceberg-parquet')
+ compile "org.apache.commons:commons-lang3"
+
compileOnly("org.apache.pig:pig") {
exclude group: "junit", module: "junit"
}
@@ -422,12 +302,12 @@ project(':iceberg-pig') {
project(':iceberg-spark-runtime') {
apply plugin: 'com.github.johnrengelman.shadow'
- tasks.build.dependsOn tasks.shadowJar
+ tasks.assemble.dependsOn tasks.shadowJar
tasks.install.dependsOn tasks.shadowJar
tasks.javadocJar.dependsOn tasks.shadowJar
configurations {
- compile {
+ compileOnly {
// included in Spark
exclude group: 'org.slf4j'
exclude group: 'org.apache.commons'
@@ -448,6 +328,12 @@ project(':iceberg-spark-runtime') {
zip64 true
+ // include the LICENSE and NOTICE files for the shaded Jar
+ from(projectDir) {
+ include 'LICENSE'
+ include 'NOTICE'
+ }
+
// Relocate dependencies to avoid conflicts
relocate 'com.google', 'org.apache.iceberg.shaded.com.google'
relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml'
@@ -467,3 +353,37 @@ project(':iceberg-spark-runtime') {
}
}
+@Memoized
+boolean isVersionFileExists() {
+ return file('version.txt').exists()
+}
+
+@Memoized
+String getVersionFromFile() {
+ return file('version.txt').text.trim()
+}
+
+String getProjectVersion() {
+ if (isVersionFileExists()) {
+ return getVersionFromFile()
+ }
+
+ try {
+ return gitVersion()
+ } catch (NullPointerException e) {
+ throw new Exception("Neither version.txt nor git version exists")
+ }
+}
+
+String getJavadocVersion() {
+ if (isVersionFileExists()) {
+ return getVersionFromFile()
+ }
+
+ try {
+ // use the branch name in place of version in Javadoc
+ return versionDetails().branchName
+ } catch (NullPointerException e) {
+ throw new Exception("Neither version.txt nor git version exists")
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/BaseCombinedScanTask.java b/core/src/main/java/org/apache/iceberg/BaseCombinedScanTask.java
index e8513ab..00f493f 100644
--- a/core/src/main/java/org/apache/iceberg/BaseCombinedScanTask.java
+++ b/core/src/main/java/org/apache/iceberg/BaseCombinedScanTask.java
@@ -19,6 +19,8 @@
package org.apache.iceberg;
+import com.google.common.base.Joiner;
+import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;
import java.util.Collection;
import java.util.List;
@@ -38,4 +40,11 @@ public class BaseCombinedScanTask implements CombinedScanTask {
public Collection<FileScanTask> files() {
return tasks;
}
+
+ @Override
+ public String toString() {
+ return MoreObjects.toStringHelper(this)
+ .add("tasks", Joiner.on(", ").join(tasks))
+ .toString();
+ }
}
diff --git a/core/src/main/java/org/apache/iceberg/BaseMetadataTable.java b/core/src/main/java/org/apache/iceberg/BaseMetadataTable.java
index 4bebc13..0d6dd77 100644
--- a/core/src/main/java/org/apache/iceberg/BaseMetadataTable.java
+++ b/core/src/main/java/org/apache/iceberg/BaseMetadataTable.java
@@ -27,6 +27,8 @@ import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.LocationProvider;
abstract class BaseMetadataTable implements Table {
+ private PartitionSpec spec = PartitionSpec.unpartitioned();
+
abstract Table table();
abstract String metadataTableName();
@@ -52,7 +54,12 @@ abstract class BaseMetadataTable implements Table {
@Override
public PartitionSpec spec() {
- return PartitionSpec.unpartitioned();
+ return spec;
+ }
+
+ @Override
+ public Map<Integer, PartitionSpec> specs() {
+ return ImmutableMap.of(spec.specId(), spec);
}
@Override
diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
index 06d0a1e..0041231 100644
--- a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
+++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
@@ -19,38 +19,29 @@
package org.apache.iceberg;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.MapMaker;
import com.google.common.collect.Maps;
-import java.util.Locale;
+import com.google.common.collect.Sets;
+import java.io.IOException;
import java.util.Map;
-import org.apache.hadoop.conf.Configuration;
+import java.util.Set;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.CommitFailedException;
import org.apache.iceberg.exceptions.NoSuchTableException;
+import org.apache.iceberg.exceptions.RuntimeIOException;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.util.Tasks;
+import org.apache.iceberg.util.ThreadPools;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public abstract class BaseMetastoreCatalog implements Catalog {
- enum TableType {
- ENTRIES,
- FILES,
- HISTORY,
- SNAPSHOTS,
- MANIFESTS;
-
- static TableType from(String name) {
- try {
- return TableType.valueOf(name.toUpperCase(Locale.ROOT));
- } catch (IllegalArgumentException ignored) {
- return null;
- }
- }
- }
-
- private final Configuration conf;
-
- protected BaseMetastoreCatalog(Configuration conf) {
- this.conf = conf;
- }
+ private static final Logger LOG = LoggerFactory.getLogger(BaseMetastoreCatalog.class);
@Override
public Table createTable(
@@ -59,7 +50,9 @@ public abstract class BaseMetastoreCatalog implements Catalog {
PartitionSpec spec,
String location,
Map<String, String> properties) {
- TableOperations ops = newTableOps(conf, identifier);
+ Preconditions.checkArgument(isValidIdentifier(identifier), "Invalid table identifier: %s", identifier);
+
+ TableOperations ops = newTableOps(identifier);
if (ops.current() != null) {
throw new AlreadyExistsException("Table already exists: " + identifier);
}
@@ -68,62 +61,240 @@ public abstract class BaseMetastoreCatalog implements Catalog {
if (location != null) {
baseLocation = location;
} else {
- baseLocation = defaultWarehouseLocation(conf, identifier);
+ baseLocation = defaultWarehouseLocation(identifier);
}
TableMetadata metadata = TableMetadata.newTableMetadata(
- ops, schema, spec, baseLocation, properties == null ? Maps.newHashMap() : properties);
+ schema, spec, baseLocation, properties == null ? Maps.newHashMap() : properties);
ops.commit(null, metadata);
try {
- return new BaseTable(ops, identifier.toString());
+ return new BaseTable(ops, fullTableName(name(), identifier));
} catch (CommitFailedException ignored) {
throw new AlreadyExistsException("Table was created concurrently: " + identifier);
}
}
@Override
+ public Transaction newCreateTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ PartitionSpec spec,
+ String location,
+ Map<String, String> properties) {
+ Preconditions.checkArgument(isValidIdentifier(identifier), "Invalid table identifier: %s", identifier);
+
+ TableOperations ops = newTableOps(identifier);
+ if (ops.current() != null) {
+ throw new AlreadyExistsException("Table already exists: " + identifier);
+ }
+
+ String baseLocation = location != null ? location : defaultWarehouseLocation(identifier);
+ Map<String, String> tableProperties = properties != null ? properties : Maps.newHashMap();
+ TableMetadata metadata = TableMetadata.newTableMetadata(schema, spec, baseLocation, tableProperties);
+ return Transactions.createTableTransaction(ops, metadata);
+ }
+
+ @Override
+ public Transaction newReplaceTableTransaction(
+ TableIdentifier identifier,
+ Schema schema,
+ PartitionSpec spec,
+ String location,
+ Map<String, String> properties,
+ boolean orCreate) {
+
+ TableOperations ops = newTableOps(identifier);
+ if (!orCreate && ops.current() == null) {
+ throw new NoSuchTableException("No such table: " + identifier);
+ }
+
+ String baseLocation = location != null ? location : defaultWarehouseLocation(identifier);
+ Map<String, String> tableProperties = properties != null ? properties : Maps.newHashMap();
+ TableMetadata metadata = TableMetadata.newTableMetadata(schema, spec, baseLocation, tableProperties);
+ if (orCreate) {
+ return Transactions.createOrReplaceTableTransaction(ops, metadata);
+ } else {
+ return Transactions.replaceTableTransaction(ops, metadata);
+ }
+ }
+
+ @Override
public Table loadTable(TableIdentifier identifier) {
- TableOperations ops = newTableOps(conf, identifier);
- if (ops.current() == null) {
- String name = identifier.name();
- TableType type = TableType.from(name);
- if (type != null) {
- return loadMetadataTable(TableIdentifier.of(identifier.namespace().levels()), type);
- } else {
- throw new NoSuchTableException("Table does not exist: " + identifier);
+ if (isValidIdentifier(identifier)) {
+ TableOperations ops = newTableOps(identifier);
+ if (ops.current() == null) {
+ // the identifier may be valid for both tables and metadata tables
+ if (isValidMetadataIdentifier(identifier)) {
+ return loadMetadataTable(identifier);
+ }
+
+ throw new NoSuchTableException("Table does not exist: %s", identifier);
}
- }
- return new BaseTable(ops, identifier.toString());
+ return new BaseTable(ops, fullTableName(name(), identifier));
+
+ } else if (isValidMetadataIdentifier(identifier)) {
+ return loadMetadataTable(identifier);
+
+ } else {
+ throw new NoSuchTableException("Invalid table identifier: %s", identifier);
+ }
}
- private Table loadMetadataTable(TableIdentifier identifier, TableType type) {
- TableOperations ops = newTableOps(conf, identifier);
- if (ops.current() == null) {
+ private Table loadMetadataTable(TableIdentifier identifier) {
+ String name = identifier.name();
+ MetadataTableType type = MetadataTableType.from(name);
+ if (type != null) {
+ TableIdentifier baseTableIdentifier = TableIdentifier.of(identifier.namespace().levels());
+ TableOperations ops = newTableOps(baseTableIdentifier);
+ if (ops.current() == null) {
+ throw new NoSuchTableException("Table does not exist: " + baseTableIdentifier);
+ }
+
+ Table baseTable = new BaseTable(ops, fullTableName(name(), identifier));
+
+ switch (type) {
+ case ENTRIES:
+ return new ManifestEntriesTable(ops, baseTable);
+ case FILES:
+ return new DataFilesTable(ops, baseTable);
+ case HISTORY:
+ return new HistoryTable(ops, baseTable);
+ case SNAPSHOTS:
+ return new SnapshotsTable(ops, baseTable);
+ case MANIFESTS:
+ return new ManifestsTable(ops, baseTable);
+ case PARTITIONS:
+ return new PartitionsTable(ops, baseTable);
+ default:
+ throw new NoSuchTableException("Unknown metadata table type: %s for %s", type, baseTableIdentifier);
+ }
+
+ } else {
throw new NoSuchTableException("Table does not exist: " + identifier);
}
+ }
+
+ private boolean isValidMetadataIdentifier(TableIdentifier identifier) {
+ return MetadataTableType.from(identifier.name()) != null &&
+ isValidIdentifier(TableIdentifier.of(identifier.namespace().levels()));
+ }
+
+ protected boolean isValidIdentifier(TableIdentifier tableIdentifier) {
+ // by default allow all identifiers
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName() + "(" + name() + ")";
+ }
+
+ protected abstract String name();
+
+ protected abstract TableOperations newTableOps(TableIdentifier tableIdentifier);
+
+ protected abstract String defaultWarehouseLocation(TableIdentifier tableIdentifier);
+
+ /**
+ * Drops all data and metadata files referenced by TableMetadata.
+ * <p>
+ * This should be called by dropTable implementations to clean up table files once the table has been dropped in the
+ * metastore.
+ *
+ * @param io a FileIO to use for deletes
+ * @param metadata the last valid TableMetadata instance for a dropped table.
+ */
+ protected static void dropTableData(FileIO io, TableMetadata metadata) {
+ // Reads and deletes are done using Tasks.foreach(...).suppressFailureWhenFinished to complete
+ // as much of the delete work as possible and avoid orphaned data or manifest files.
- Table baseTable = new BaseTable(ops, identifier.toString());
-
- switch (type) {
- case ENTRIES:
- return new ManifestEntriesTable(ops, baseTable);
- case FILES:
- return new DataFilesTable(ops, baseTable);
- case HISTORY:
- return new HistoryTable(ops, baseTable);
- case SNAPSHOTS:
- return new SnapshotsTable(ops, baseTable);
- case MANIFESTS:
- return new ManifestsTable(ops, baseTable);
- default:
- throw new NoSuchTableException(String.format("Unknown metadata table type: %s for %s", type, identifier));
+ Set<String> manifestListsToDelete = Sets.newHashSet();
+ Set<ManifestFile> manifestsToDelete = Sets.newHashSet();
+ for (Snapshot snapshot : metadata.snapshots()) {
+ manifestsToDelete.addAll(snapshot.manifests());
+ // add the manifest list to the delete set, if present
+ if (snapshot.manifestListLocation() != null) {
+ manifestListsToDelete.add(snapshot.manifestListLocation());
+ }
}
+
+ LOG.info("Manifests to delete: {}", Joiner.on(", ").join(manifestsToDelete));
+
+ // run all of the deletes
+
+ deleteFiles(io, manifestsToDelete);
+
+ Tasks.foreach(Iterables.transform(manifestsToDelete, ManifestFile::path))
+ .noRetry().suppressFailureWhenFinished()
+ .onFailure((manifest, exc) -> LOG.warn("Delete failed for manifest: {}", manifest, exc))
+ .run(io::deleteFile);
+
+ Tasks.foreach(manifestListsToDelete)
+ .noRetry().suppressFailureWhenFinished()
+ .onFailure((list, exc) -> LOG.warn("Delete failed for manifest list: {}", list, exc))
+ .run(io::deleteFile);
+
+ Tasks.foreach(metadata.file().location())
+ .noRetry().suppressFailureWhenFinished()
+ .onFailure((list, exc) -> LOG.warn("Delete failed for metadata file: {}", list, exc))
+ .run(io::deleteFile);
+ }
+
+ private static void deleteFiles(FileIO io, Set<ManifestFile> allManifests) {
+ // keep track of deleted files in a map that can be cleaned up when memory runs low
+ Map<String, Boolean> deletedFiles = new MapMaker()
+ .concurrencyLevel(ThreadPools.WORKER_THREAD_POOL_SIZE)
+ .weakKeys()
+ .makeMap();
+
+ Tasks.foreach(allManifests)
+ .noRetry().suppressFailureWhenFinished()
+ .executeWith(ThreadPools.getWorkerPool())
+ .onFailure((item, exc) -> LOG.warn("Failed to get deleted files: this may cause orphaned data files", exc))
+ .run(manifest -> {
+ try (ManifestReader reader = ManifestReader.read(io.newInputFile(manifest.path()))) {
+ for (ManifestEntry entry : reader.entries()) {
+ // intern the file path because the weak key map uses identity (==) instead of equals
+ String path = entry.file().path().toString().intern();
+ Boolean alreadyDeleted = deletedFiles.putIfAbsent(path, true);
+ if (alreadyDeleted == null || !alreadyDeleted) {
+ try {
+ io.deleteFile(path);
+ } catch (RuntimeException e) {
+ // this may happen if the map of deleted files gets cleaned up by gc
+ LOG.warn("Delete failed for data file: {}", path, e);
+ }
+ }
+ }
+ } catch (IOException e) {
+ throw new RuntimeIOException(e, "Failed to read manifest file: " + manifest.path());
+ }
+ });
}
- protected abstract TableOperations newTableOps(Configuration newConf, TableIdentifier tableIdentifier);
+ private static String fullTableName(String catalogName, TableIdentifier identifier) {
+ StringBuilder sb = new StringBuilder();
+
+ if (catalogName.contains("/") || catalogName.contains(":")) {
+ // use / for URI-like names: thrift://host:port/db.table
+ sb.append(catalogName);
+ if (!catalogName.endsWith("/")) {
+ sb.append("/");
+ }
+ } else {
+ // use . for non-URI named catalogs: prod.db.table
+ sb.append(catalogName).append(".");
+ }
- protected abstract String defaultWarehouseLocation(Configuration hadoopConf, TableIdentifier tableIdentifier);
+ for (String level : identifier.namespace().levels()) {
+ sb.append(level).append(".");
+ }
+
+ sb.append(identifier.name());
+
+ return sb.toString();
+ }
}
diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java
index f270eaa..1fbbd33 100644
--- a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java
+++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java
@@ -21,10 +21,14 @@ package org.apache.iceberg;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicReference;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.iceberg.hadoop.HadoopFileIO;
+import java.util.function.Predicate;
+import org.apache.iceberg.encryption.EncryptionManager;
+import org.apache.iceberg.exceptions.CommitFailedException;
+import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.LocationProvider;
import org.apache.iceberg.io.OutputFile;
@@ -43,18 +47,12 @@ public abstract class BaseMetastoreTableOperations implements TableOperations {
private static final String METADATA_FOLDER_NAME = "metadata";
private static final String DATA_FOLDER_NAME = "data";
- private final Configuration conf;
- private final FileIO fileIo;
-
private TableMetadata currentMetadata = null;
private String currentMetadataLocation = null;
private boolean shouldRefresh = true;
private int version = -1;
- protected BaseMetastoreTableOperations(Configuration conf) {
- this.conf = conf;
- this.fileIo = new HadoopFileIO(conf);
- }
+ protected BaseMetastoreTableOperations() { }
@Override
public TableMetadata current() {
@@ -72,25 +70,72 @@ public abstract class BaseMetastoreTableOperations implements TableOperations {
return version;
}
+ @Override
+ public TableMetadata refresh() {
+ try {
+ doRefresh();
+ } catch (NoSuchTableException e) {
+ LOG.warn("Could not find the table during refresh, setting current metadata to null", e);
+ currentMetadata = null;
+ currentMetadataLocation = null;
+ version = -1;
+ shouldRefresh = false;
+ throw e;
+ }
+ return current();
+ }
+
+ protected void doRefresh() {
+ throw new UnsupportedOperationException("Not implemented: doRefresh");
+ }
+
+ @Override
+ public void commit(TableMetadata base, TableMetadata metadata) {
+ // if the metadata is already out of date, reject it
+ if (base != current()) {
+ throw new CommitFailedException("Cannot commit: stale table metadata");
+ }
+ // if the metadata is not changed, return early
+ if (base == metadata) {
+ LOG.info("Nothing to commit.");
+ return;
+ }
+
+ doCommit(base, metadata);
+ deleteRemovedMetadataFiles(base, metadata);
+ requestRefresh();
+ }
+
+ protected void doCommit(TableMetadata base, TableMetadata metadata) {
+ throw new UnsupportedOperationException("Not implemented: doCommit");
+ }
+
protected void requestRefresh() {
this.shouldRefresh = true;
}
protected String writeNewMetadata(TableMetadata metadata, int newVersion) {
String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion);
- OutputFile newMetadataLocation = fileIo.newOutputFile(newTableMetadataFilePath);
+ OutputFile newMetadataLocation = io().newOutputFile(newTableMetadataFilePath);
// write the new metadata
- TableMetadataParser.write(metadata, newMetadataLocation);
+ // use overwrite to avoid negative caching in S3. this is safe because the metadata location is
+ // always unique because it includes a UUID.
+ TableMetadataParser.overwrite(metadata, newMetadataLocation);
- return newTableMetadataFilePath;
+ return newMetadataLocation.location();
}
protected void refreshFromMetadataLocation(String newLocation) {
- refreshFromMetadataLocation(newLocation, 20);
+ refreshFromMetadataLocation(newLocation, null, 20);
}
protected void refreshFromMetadataLocation(String newLocation, int numRetries) {
+ refreshFromMetadataLocation(newLocation, null, numRetries);
+ }
+
+ protected void refreshFromMetadataLocation(String newLocation, Predicate<Exception> shouldRetry,
+ int numRetries) {
// use null-safe equality check because new tables have a null metadata location
if (!Objects.equal(currentMetadataLocation, newLocation)) {
LOG.info("Refreshing table metadata from new version: {}", newLocation);
@@ -98,9 +143,10 @@ public abstract class BaseMetastoreTableOperations implements TableOperations {
AtomicReference<TableMetadata> newMetadata = new AtomicReference<>();
Tasks.foreach(newLocation)
.retry(numRetries).exponentialBackoff(100, 5000, 600000, 4.0 /* 100, 400, 1600, ... */)
- .suppressFailureWhenFinished()
+ .throwFailureWhenFinished()
+ .shouldRetryTest(shouldRetry)
.run(metadataLocation -> newMetadata.set(
- TableMetadataParser.read(this, io().newInputFile(metadataLocation))));
+ TableMetadataParser.read(io(), io().newInputFile(metadataLocation))));
String newUUID = newMetadata.get().uuid();
if (currentMetadata != null) {
@@ -132,13 +178,53 @@ public abstract class BaseMetastoreTableOperations implements TableOperations {
}
@Override
- public FileIO io() {
- return fileIo;
+ public LocationProvider locationProvider() {
+ return LocationProviders.locationsFor(current().location(), current().properties());
}
@Override
- public LocationProvider locationProvider() {
- return LocationProviders.locationsFor(current().location(), current().properties());
+ public TableOperations temp(TableMetadata uncommittedMetadata) {
+ return new TableOperations() {
+ @Override
+ public TableMetadata current() {
+ return uncommittedMetadata;
+ }
+
+ @Override
+ public TableMetadata refresh() {
+ throw new UnsupportedOperationException("Cannot call refresh on temporary table operations");
+ }
+
+ @Override
+ public void commit(TableMetadata base, TableMetadata metadata) {
+ throw new UnsupportedOperationException("Cannot call commit on temporary table operations");
+ }
+
+ @Override
+ public String metadataFileLocation(String fileName) {
+ return BaseMetastoreTableOperations.this.metadataFileLocation(uncommittedMetadata, fileName);
+ }
+
+ @Override
+ public LocationProvider locationProvider() {
+ return LocationProviders.locationsFor(uncommittedMetadata.location(), uncommittedMetadata.properties());
+ }
+
+ @Override
+ public FileIO io() {
+ return BaseMetastoreTableOperations.this.io();
+ }
+
+ @Override
+ public EncryptionManager encryption() {
+ return BaseMetastoreTableOperations.this.encryption();
+ }
+
+ @Override
+ public long newSnapshotId() {
+ return BaseMetastoreTableOperations.this.newSnapshotId();
+ }
+ };
}
private String newTableMetadataFilePath(TableMetadata meta, int newVersion) {
@@ -158,4 +244,31 @@ public abstract class BaseMetastoreTableOperations implements TableOperations {
return -1;
}
}
+
+ /**
+ * Deletes the oldest metadata files if {@link TableProperties#METADATA_DELETE_AFTER_COMMIT_ENABLED} is true.
+ *
+ * @param base table metadata on which previous versions were based
+ * @param metadata new table metadata with updated previous versions
+ */
+ private void deleteRemovedMetadataFiles(TableMetadata base, TableMetadata metadata) {
+ if (base == null) {
+ return;
+ }
+
+ boolean deleteAfterCommit = metadata.propertyAsBoolean(
+ TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED,
+ TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT);
+
+ Set<TableMetadata.MetadataLogEntry> removedPreviousMetadataFiles = Sets.newHashSet(base.previousFiles());
+ removedPreviousMetadataFiles.removeAll(metadata.previousFiles());
+
+ if (deleteAfterCommit) {
+ Tasks.foreach(removedPreviousMetadataFiles)
+ .noRetry().suppressFailureWhenFinished()
+ .onFailure((previousMetadataFile, exc) ->
+ LOG.warn("Delete failed for previous metadata file: {}", previousMetadataFile, exc))
+ .run(previousMetadataFile -> io().deleteFile(previousMetadataFile.file()));
+ }
+ }
}
diff --git a/core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java b/core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java
new file mode 100644
index 0000000..6f21a87
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.expressions.Evaluator;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.InclusiveMetricsEvaluator;
+import org.apache.iceberg.expressions.Projections;
+import org.apache.iceberg.expressions.StrictMetricsEvaluator;
+
+public class BaseOverwriteFiles extends MergingSnapshotProducer<OverwriteFiles> implements OverwriteFiles {
+ private boolean validateAddedFilesMatchOverwriteFilter = false;
+ private Long readSnapshotId = null;
+ private Expression conflictDetectionFilter = null;
+
+ protected BaseOverwriteFiles(TableOperations ops) {
+ super(ops);
+ }
+
+ @Override
+ protected OverwriteFiles self() {
+ return this;
+ }
+
+ @Override
+ protected String operation() {
+ return DataOperations.OVERWRITE;
+ }
+
+ @Override
+ public OverwriteFiles overwriteByRowFilter(Expression expr) {
+ deleteByRowFilter(expr);
+ return this;
+ }
+
+ @Override
+ public OverwriteFiles addFile(DataFile file) {
+ add(file);
+ return this;
+ }
+
+ @Override
+ public OverwriteFiles deleteFile(DataFile file) {
+ delete(file);
+ return this;
+ }
+
+ @Override
+ public OverwriteFiles validateAddedFilesMatchOverwriteFilter() {
+ this.validateAddedFilesMatchOverwriteFilter = true;
+ return this;
+ }
+
+ @Override
+ public OverwriteFiles validateNoConflictingAppends(Long newReadSnapshotId, Expression newConflictDetectionFilter) {
+ Preconditions.checkArgument(newConflictDetectionFilter != null, "Conflict detection filter cannot be null");
+ this.readSnapshotId = newReadSnapshotId;
+ this.conflictDetectionFilter = newConflictDetectionFilter;
+ failMissingDeletePaths();
+ return this;
+ }
+
+ @Override
+ public List<ManifestFile> apply(TableMetadata base) {
+ if (validateAddedFilesMatchOverwriteFilter) {
+ PartitionSpec spec = writeSpec();
+ Expression rowFilter = rowFilter();
+
+ Expression inclusiveExpr = Projections.inclusive(spec).project(rowFilter);
+ Evaluator inclusive = new Evaluator(spec.partitionType(), inclusiveExpr);
+
+ Expression strictExpr = Projections.strict(spec).project(rowFilter);
+ Evaluator strict = new Evaluator(spec.partitionType(), strictExpr);
+
+ StrictMetricsEvaluator metrics = new StrictMetricsEvaluator(
+ base.schema(), rowFilter);
+
+ for (DataFile file : addedFiles()) {
+ // the real test is that the strict or metrics test matches the file, indicating that all
+ // records in the file match the filter. inclusive is used to avoid testing the metrics,
+ // which is more complicated
+ ValidationException.check(
+ inclusive.eval(file.partition()) &&
+ (strict.eval(file.partition()) || metrics.eval(file)),
+ "Cannot append file with rows that do not match filter: %s: %s",
+ rowFilter, file.path());
+ }
+ }
+
+ if (conflictDetectionFilter != null) {
+ PartitionSpec spec = writeSpec();
+ Expression inclusiveExpr = Projections.inclusive(spec).project(conflictDetectionFilter);
+ Evaluator inclusive = new Evaluator(spec.partitionType(), inclusiveExpr);
+
+ InclusiveMetricsEvaluator metrics = new InclusiveMetricsEvaluator(base.schema(), conflictDetectionFilter);
+
+ List<DataFile> newFiles = collectNewFiles(base);
+ for (DataFile newFile : newFiles) {
+ ValidationException.check(
+ !inclusive.eval(newFile.partition()) || !metrics.eval(newFile),
+ "A file was appended that might contain data matching filter '%s': %s",
+ conflictDetectionFilter, newFile.path());
+ }
+ }
+
+ return super.apply(base);
+ }
+
+ private List<DataFile> collectNewFiles(TableMetadata meta) {
+ List<DataFile> newFiles = new ArrayList<>();
+
+ Long currentSnapshotId = meta.currentSnapshot() == null ? null : meta.currentSnapshot().snapshotId();
+ while (currentSnapshotId != null && !currentSnapshotId.equals(readSnapshotId)) {
+ Snapshot currentSnapshot = meta.snapshot(currentSnapshotId);
+
+ if (currentSnapshot == null) {
+ throw new ValidationException(
+ "Cannot determine history between read snapshot %s and current %s",
+ readSnapshotId, currentSnapshotId);
+ }
+
+ Iterables.addAll(newFiles, currentSnapshot.addedFiles());
+ currentSnapshotId = currentSnapshot.parentId();
+ }
+
+ return newFiles;
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/BaseReplacePartitions.java b/core/src/main/java/org/apache/iceberg/BaseReplacePartitions.java
new file mode 100644
index 0000000..0bfbb62
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/BaseReplacePartitions.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.util.List;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.expressions.Expressions;
+
+public class BaseReplacePartitions
+ extends MergingSnapshotProducer<ReplacePartitions> implements ReplacePartitions {
+ BaseReplacePartitions(TableOperations ops) {
+ super(ops);
+ }
+
+ @Override
+ protected ReplacePartitions self() {
+ return this;
+ }
+
+ @Override
+ protected String operation() {
+ return DataOperations.OVERWRITE;
+ }
+
+ @Override
+ public ReplacePartitions addFile(DataFile file) {
+ dropPartition(file.partition());
+ add(file);
+ return this;
+ }
+
+ @Override
+ public ReplacePartitions validateAppendOnly() {
+ failAnyDelete();
+ return this;
+ }
+
+ @Override
+ public List<ManifestFile> apply(TableMetadata base) {
+ if (writeSpec().fields().size() <= 0) {
+ // replace all data in an unpartitioned table
+ deleteByRowFilter(Expressions.alwaysTrue());
+ }
+
+ try {
+ return super.apply(base);
+ } catch (DeleteException e) {
+ throw new ValidationException(
+ "Cannot commit file that conflicts with existing partition: %s", e.partition());
+ }
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java
new file mode 100644
index 0000000..fd7ca4e
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import com.google.common.base.Preconditions;
+import java.util.Set;
+
+class BaseRewriteFiles extends MergingSnapshotProducer<RewriteFiles> implements RewriteFiles {
+ BaseRewriteFiles(TableOperations ops) {
+ super(ops);
+
+ // replace files must fail if any of the deleted paths is missing and cannot be deleted
+ failMissingDeletePaths();
+ }
+
+ @Override
+ protected RewriteFiles self() {
+ return this;
+ }
+
+ @Override
+ protected String operation() {
+ return DataOperations.REPLACE;
+ }
+
+ @Override
+ public RewriteFiles rewriteFiles(Set<DataFile> filesToDelete, Set<DataFile> filesToAdd) {
+ Preconditions.checkArgument(filesToDelete != null && !filesToDelete.isEmpty(),
+ "Files to delete cannot be null or empty");
+ Preconditions.checkArgument(filesToAdd != null && !filesToAdd.isEmpty(),
+ "Files to add can not be null or empty");
+
+ for (DataFile toDelete : filesToDelete) {
+ delete(toDelete);
+ }
+
+ for (DataFile toAdd : filesToAdd) {
+ add(toAdd);
+ }
+
+ return this;
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/BaseRewriteManifests.java b/core/src/main/java/org/apache/iceberg/BaseRewriteManifests.java
new file mode 100644
index 0000000..4a084b9
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/BaseRewriteManifests.java
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import org.apache.iceberg.exceptions.RuntimeIOException;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.util.Tasks;
+import org.apache.iceberg.util.ThreadPools;
+
+import static org.apache.iceberg.TableProperties.MANIFEST_TARGET_SIZE_BYTES;
+import static org.apache.iceberg.TableProperties.MANIFEST_TARGET_SIZE_BYTES_DEFAULT;
+
+
+public class BaseRewriteManifests extends SnapshotProducer<RewriteManifests> implements RewriteManifests {
+ private static final String KEPT_MANIFESTS_COUNT = "manifests-kept";
+ private static final String CREATED_MANIFESTS_COUNT = "manifests-created";
+ private static final String REPLACED_MANIFESTS_COUNT = "manifests-replaced";
+ private static final String PROCESSED_ENTRY_COUNT = "entries-processed";
+
+ private static final Set<ManifestEntry.Status> ALLOWED_ENTRY_STATUSES = ImmutableSet.of(
+ ManifestEntry.Status.EXISTING);
+
+ private final TableOperations ops;
+ private final PartitionSpec spec;
+ private final long manifestTargetSizeBytes;
+
+ private final Set<ManifestFile> deletedManifests = Sets.newHashSet();
+ private final List<ManifestFile> addedManifests = Lists.newArrayList();
+
+ private final List<ManifestFile> keptManifests = Collections.synchronizedList(new ArrayList<>());
+ private final List<ManifestFile> newManifests = Collections.synchronizedList(new ArrayList<>());
+ private final Set<ManifestFile> rewrittenManifests = Collections.synchronizedSet(new HashSet<>());
+ private final Map<Object, WriterWrapper> writers = Collections.synchronizedMap(new HashMap<>());
+
+ private final AtomicInteger manifestSuffix = new AtomicInteger(0);
+ private final AtomicLong entryCount = new AtomicLong(0);
+
+ private Function<DataFile, Object> clusterByFunc;
+ private Predicate<ManifestFile> predicate;
+
+ private final SnapshotSummary.Builder summaryBuilder = SnapshotSummary.builder();
+
+ BaseRewriteManifests(TableOperations ops) {
+ super(ops);
+ this.ops = ops;
+ this.spec = ops.current().spec();
+ this.manifestTargetSizeBytes =
+ ops.current().propertyAsLong(MANIFEST_TARGET_SIZE_BYTES, MANIFEST_TARGET_SIZE_BYTES_DEFAULT);
+ }
+
+ @Override
+ protected RewriteManifests self() {
+ return this;
+ }
+
+ @Override
+ protected String operation() {
+ return DataOperations.REPLACE;
+ }
+
+ @Override
+ public RewriteManifests set(String property, String value) {
+ summaryBuilder.set(property, value);
+ return this;
+ }
+
+ @Override
+ protected Map<String, String> summary() {
+ summaryBuilder.set(KEPT_MANIFESTS_COUNT, String.valueOf(keptManifests.size()));
+ summaryBuilder.set(CREATED_MANIFESTS_COUNT, String.valueOf(newManifests.size() + addedManifests.size()));
+ summaryBuilder.set(REPLACED_MANIFESTS_COUNT, String.valueOf(rewrittenManifests.size() + deletedManifests.size()));
+ summaryBuilder.set(PROCESSED_ENTRY_COUNT, String.valueOf(entryCount.get()));
+ return summaryBuilder.build();
+ }
+
+ @Override
+ public RewriteManifests clusterBy(Function<DataFile, Object> func) {
+ this.clusterByFunc = func;
+ return this;
+ }
+
+ @Override
+ public RewriteManifests rewriteIf(Predicate<ManifestFile> pred) {
+ this.predicate = pred;
+ return this;
+ }
+
+ @Override
+ public RewriteManifests deleteManifest(ManifestFile manifest) {
+ deletedManifests.add(manifest);
+ return this;
+ }
+
+ @Override
+ public RewriteManifests addManifest(ManifestFile manifest) {
+ try {
+ // the appended manifest must be rewritten with this update's snapshot ID
+ addedManifests.add(copyManifest(manifest));
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Cannot append manifest: " + e.getMessage());
+ }
+ return this;
+ }
+
+ private ManifestFile copyManifest(ManifestFile manifest) {
+ Map<Integer, PartitionSpec> specsById = ops.current().specsById();
+ try (ManifestReader reader = ManifestReader.read(ops.io().newInputFile(manifest.path()), specsById)) {
+ OutputFile newFile = manifestPath(manifestSuffix.getAndIncrement());
+ return ManifestWriter.copyManifest(reader, newFile, snapshotId(), summaryBuilder, ALLOWED_ENTRY_STATUSES);
+ } catch (IOException e) {
+ throw new RuntimeIOException(e, "Failed to close manifest: %s", manifest);
+ }
+ }
+
+ @Override
+ public List<ManifestFile> apply(TableMetadata base) {
+ List<ManifestFile> currentManifests = base.currentSnapshot().manifests();
+ Set<ManifestFile> currentManifestSet = ImmutableSet.copyOf(currentManifests);
+
+ validateDeletedManifests(currentManifestSet);
+
+ if (requiresRewrite(currentManifestSet)) {
+ performRewrite(currentManifests);
+ } else {
+ keepActiveManifests(currentManifests);
+ }
+
+ validateFilesCounts();
+
+ // put new manifests at the beginning
+ List<ManifestFile> apply = new ArrayList<>();
+ apply.addAll(newManifests);
+ apply.addAll(addedManifests);
+ apply.addAll(keptManifests);
+
+ return apply;
+ }
+
+ private boolean requiresRewrite(Set<ManifestFile> currentManifests) {
+ if (clusterByFunc == null) {
+ // manifests are deleted and added directly so don't perform a rewrite
+ return false;
+ }
+
+ if (rewrittenManifests.size() == 0) {
+ // nothing yet processed so perform a full rewrite
+ return true;
+ }
+
+ // if any processed manifest is not in the current manifest list, perform a full rewrite
+ return rewrittenManifests.stream().anyMatch(manifest -> !currentManifests.contains(manifest));
+ }
+
+ private void keepActiveManifests(List<ManifestFile> currentManifests) {
+ // keep any existing manifests as-is that were not processed
+ keptManifests.clear();
+ currentManifests.stream()
+ .filter(manifest -> !rewrittenManifests.contains(manifest) && !deletedManifests.contains(manifest))
+ .forEach(manifest -> keptManifests.add(manifest));
+ }
+
+ private void reset() {
+ cleanUncommitted(newManifests, ImmutableSet.of());
+ entryCount.set(0);
+ keptManifests.clear();
+ rewrittenManifests.clear();
+ newManifests.clear();
+ writers.clear();
+ }
+
+ private void performRewrite(List<ManifestFile> currentManifests) {
+ reset();
+
+ List<ManifestFile> remainingManifests = currentManifests.stream()
+ .filter(manifest -> !deletedManifests.contains(manifest))
+ .collect(Collectors.toList());
+
+ try {
+ Tasks.foreach(remainingManifests)
+ .executeWith(ThreadPools.getWorkerPool())
+ .run(manifest -> {
+ if (predicate != null && !predicate.test(manifest)) {
+ keptManifests.add(manifest);
+ } else {
+ rewrittenManifests.add(manifest);
+ try (ManifestReader reader =
+ ManifestReader.read(ops.io().newInputFile(manifest.path()), ops.current().specsById())) {
+ FilteredManifest filteredManifest = reader.select(Arrays.asList("*"));
+ filteredManifest.liveEntries().forEach(
+ entry -> appendEntry(entry, clusterByFunc.apply(entry.file()))
+ );
+
+ } catch (IOException x) {
+ throw new RuntimeIOException(x);
+ }
+ }
+ });
+ } finally {
+ Tasks.foreach(writers.values()).executeWith(ThreadPools.getWorkerPool()).run(writer -> writer.close());
+ }
+ }
+
+ private void validateDeletedManifests(Set<ManifestFile> currentManifests) {
+ // directly deleted manifests must be still present in the current snapshot
+ deletedManifests.stream()
+ .filter(manifest -> !currentManifests.contains(manifest))
+ .findAny()
+ .ifPresent(manifest -> {
+ throw new ValidationException("Manifest is missing: %s", manifest.path());
+ });
+ }
+
+ private void validateFilesCounts() {
+ int createdManifestsFilesCount = activeFilesCount(newManifests) + activeFilesCount(addedManifests);
+ int replacedManifestsFilesCount = activeFilesCount(rewrittenManifests) + activeFilesCount(deletedManifests);
+
+ if (createdManifestsFilesCount != replacedManifestsFilesCount) {
+ throw new ValidationException(
+ "Replaced and created manifests must have the same number of active files: %d (new), %d (old)",
+ createdManifestsFilesCount, replacedManifestsFilesCount);
+ }
+ }
+
+ private int activeFilesCount(Iterable<ManifestFile> manifests) {
+ int activeFilesCount = 0;
+
+ for (ManifestFile manifest : manifests) {
+ Preconditions.checkNotNull(manifest.addedFilesCount(), "Missing file counts in %s", manifest.path());
+ Preconditions.checkNotNull(manifest.existingFilesCount(), "Missing file counts in %s", manifest.path());
+ activeFilesCount += manifest.addedFilesCount();
+ activeFilesCount += manifest.existingFilesCount();
+ }
+
+ return activeFilesCount;
+ }
+
+ private void appendEntry(ManifestEntry entry, Object key) {
+ Preconditions.checkNotNull(entry, "Manifest entry cannot be null");
+ Preconditions.checkNotNull(key, "Key cannot be null");
+
+ WriterWrapper writer = getWriter(key);
+ writer.addEntry(entry);
+ entryCount.incrementAndGet();
+ }
+
+ private WriterWrapper getWriter(Object key) {
+ WriterWrapper writer = writers.get(key);
+ if (writer == null) {
+ synchronized (writers) {
+ writer = writers.get(key); // check again after getting lock
+ if (writer == null) {
+ writer = new WriterWrapper();
+ writers.put(key, writer);
+ }
+ }
+ }
+ return writer;
+ }
+
+ @Override
+ protected void cleanUncommitted(Set<ManifestFile> committed) {
+ cleanUncommitted(newManifests, committed);
+ cleanUncommitted(addedManifests, committed);
+ }
+
+ private void cleanUncommitted(Iterable<ManifestFile> manifests, Set<ManifestFile> committedManifests) {
+ for (ManifestFile manifest : manifests) {
+ if (!committedManifests.contains(manifest)) {
+ deleteFile(manifest.path());
+ }
+ }
+ }
+
+ long getManifestTargetSizeBytes() {
+ return manifestTargetSizeBytes;
+ }
+
+ class WriterWrapper {
+ private ManifestWriter writer;
+
+ synchronized void addEntry(ManifestEntry entry) {
+ if (writer == null) {
+ writer = newWriter();
+ } else if (writer.length() >= getManifestTargetSizeBytes()) {
+ close();
+ writer = newWriter();
+ }
+ writer.existing(entry);
+ }
+
+ private ManifestWriter newWriter() {
+ return new ManifestWriter(spec, manifestPath(manifestSuffix.getAndIncrement()), snapshotId());
+ }
+
+ synchronized void close() {
+ if (writer != null) {
+ try {
+ writer.close();
+ newManifests.add(writer.toManifestFile());
+ } catch (IOException x) {
+ throw new RuntimeIOException(x);
+ }
+ }
+ }
+
+ }
+
+}
diff --git a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java
index c2e59af..9632623 100644
--- a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java
+++ b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java
@@ -20,6 +20,8 @@
package org.apache.iceberg;
import com.google.common.base.MoreObjects;
+import com.google.common.base.Objects;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import java.io.IOException;
@@ -29,10 +31,11 @@ import java.util.Map;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.exceptions.RuntimeIOException;
import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.InputFile;
class BaseSnapshot implements Snapshot {
- private final TableOperations ops;
+ private final FileIO io;
private final long snapshotId;
private final Long parentId;
private final long timestampMillis;
@@ -48,22 +51,22 @@ class BaseSnapshot implements Snapshot {
/**
* For testing only.
*/
- BaseSnapshot(TableOperations ops,
+ BaseSnapshot(FileIO io,
long snapshotId,
String... manifestFiles) {
- this(ops, snapshotId, null, System.currentTimeMillis(), null, null,
+ this(io, snapshotId, null, System.currentTimeMillis(), null, null,
Lists.transform(Arrays.asList(manifestFiles),
- path -> new GenericManifestFile(ops.io().newInputFile(path), 0)));
+ path -> new GenericManifestFile(io.newInputFile(path), 0)));
}
- BaseSnapshot(TableOperations ops,
+ BaseSnapshot(FileIO io,
long snapshotId,
Long parentId,
long timestampMillis,
String operation,
Map<String, String> summary,
InputFile manifestList) {
- this.ops = ops;
+ this.io = io;
this.snapshotId = snapshotId;
this.parentId = parentId;
this.timestampMillis = timestampMillis;
@@ -72,14 +75,14 @@ class BaseSnapshot implements Snapshot {
this.manifestList = manifestList;
}
- BaseSnapshot(TableOperations ops,
+ BaseSnapshot(FileIO io,
long snapshotId,
Long parentId,
long timestampMillis,
String operation,
Map<String, String> summary,
List<ManifestFile> manifests) {
- this(ops, snapshotId, parentId, timestampMillis, operation, summary, (InputFile) null);
+ this(io, snapshotId, parentId, timestampMillis, operation, summary, (InputFile) null);
this.manifests = manifests;
}
@@ -152,32 +155,35 @@ class BaseSnapshot implements Snapshot {
}
private void cacheChanges() {
- List<DataFile> adds = Lists.newArrayList();
- List<DataFile> deletes = Lists.newArrayList();
-
- // accumulate adds and deletes from all manifests.
- // because manifests can be reused in newer snapshots, filter the changes by snapshot id.
- for (String manifest : Iterables.transform(manifests(), ManifestFile::path)) {
- try (ManifestReader reader = ManifestReader.read(
- ops.io().newInputFile(manifest),
... 177843 lines suppressed ...