You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2020/01/09 14:32:02 UTC
[parquet-mr] branch bloom-filter updated (ba28686 -> 1e44aa4)
This is an automated email from the ASF dual-hosted git repository.
gabor pushed a change to branch bloom-filter
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git.
from ba28686 PARQUET-1660: align Bloom filter implementation with format (#686)
add e0ab7af PARQUET-1654: Remove unnecessary options when building thrift (#676)
add e10a645 PARQUET-1649: Bump Jackson Databind to 2.9.9.3 (#674)
add e9d8716 PARQUET-1601: Add zstd support to parquet-cli to-avro (#653)
add 56f164f PARQUET-1661: Upgrade to Avro 1.9.1 (#682)
add 76c40e7 PARQUET-1542: Merge multiple I/O to one time I/O in method readFooter (#624)
add 0cb5ead PARQUET-1662: Upgrade Jackson to version 2.9.10 (#683)
add 6e072cf PARQUET-1665: Upgrade zstd-jni to 1.4.0-1 (#684)
add 7c4d1ec PARQUET-1644: Clean up some benchmark code and docs. (#672)
add 600ffba PARQUET-1669: Disable compiling all libraries when building thrift (#688)
add 6db7287 PARQUET-1671: Upgrade Yetus to 0.11.0 (#689)
add 7772644 PARQUET-1673: Upgrade parquet-mr format version to 2.7.0 (#690)
add 59ae034 PARQUET-1578: Introduce Lambdas (#641)
add 10f57a3 PARQUET-1596: PARQUET-1375 broke parquet-cli's to-avro command (#648)
add 52a502e PARQUET-0000: Fix typo (#666)
add 57bd243 PARQUET-0000: Improved formatting (#673)
add 0c6a650 PARQUET-1650: Implement unit test to validate column/offset indexes (#675)
add 2117abc PARQUET-1682: Maintain forward compatibility for TIME/TIMESTAMP (#694)
add 2122a8a PARQUET-1683: Remove unnecessary string conversions (#695)
add 4648b06 PARQUET-XXXX: Minor Javadoc improvements (#667)
add 10b926f PARQUET-1444: Prefer ArrayList over LinkedList (#583)
add ca7d0e2 PARQUET-1496: Update Scala to 2.12 (#693)
add 19b10ac PARQUET-1499: Add Java 11 to Travis (#596)
add d1190ab PARQUET-1691: Build fails due to missing hadoop-lzo (#698)
add e60f5f1 PARQUET-1687: Update release process (#697)
add 76f9010 PARQUET-1685: Truncate Min/Max for Statistics (#696)
add 7d474c7 Update CHANGES.md for 1.11.0rc7
add 18519eb [maven-release-plugin] prepare release apache-parquet-1.11.0-rc7
add 7bd38d1 [maven-release-plugin] prepare for next development iteration
add 475b446 Prepare for next development iteration
add 4ca29c7 [PARQUET-1717] Convert i16 thrift to INT16 logical type instead (#706)
add 2c9ccf9 PARQUET-1696: Remove unused hadoop-1 profile (#701)
add 3b4ecf2 PARQUET-1723: Read From Maps without using .contains(...) (#711)
add b9f16e5 PARQUET-1724: Use ConcurrentHashMap for Cache in DictionaryPageReader (#712)
add 1e15f60 PARQUET-1726: Use Java 8 Multi Exception Handling (#714)
add 3d8ce06 PARQUET-1727: Do Not Swallow InterruptedException in ParquetLoader (#715)
add cce6fdb PARQUET-1732: Call toArray With Empty Array (#720)
add a7447f6 PARQUET-1731: Use JDK 8 Facilities to Simplify FilteringRecordMaterializer (#719)
add c697d80 PARQUET-1730: Use switch Statement in AvroIndexedRecordConverter for Enums (#718)
add ce55c9b PARQUET-1703: Update API compatibility check (#709)
add e430527 PARQUET-1741: Restore APIs to keep backward compatibility (#729)
add ac7840c PARQUET-1725: Replace Usage of Strings.join with JDK Functionality in… (#713)
add 1e44aa4 Merge remote-tracking branch 'apache/master' into bloom-filter
No new revisions were added by this update.
Summary of changes:
.travis.yml | 5 +-
CHANGES.md | 64 +-
README.md | 2 +-
.../run_checksums.sh => dev/finalize-release | 32 +-
dev/prepare-release.sh | 15 +-
dev/source-release.sh | 4 +-
dev/travis-before_install.sh | 8 +-
.../org/apache/parquet/avro/AvroConverters.java | 9 +-
.../parquet/avro/AvroIndexedRecordConverter.java | 49 +-
.../apache/parquet/avro/AvroRecordConverter.java | 4 +-
.../parquet/avro/TestArrayCompatibility.java | 1048 ++++++++++----------
.../parquet/avro/TestAvroSchemaConverter.java | 35 +-
parquet-benchmarks/README.md | 36 +-
parquet-benchmarks/pom.xml | 5 -
parquet-benchmarks/run.sh | 96 +-
.../apache/parquet/benchmarks/BenchmarkFiles.java | 2 +
.../apache/parquet/benchmarks/DataGenerator.java | 9 +-
.../benchmarks/PageChecksumDataGenerator.java | 23 +-
.../benchmarks/PageChecksumReadBenchmarks.java | 63 +-
.../benchmarks/PageChecksumWriteBenchmarks.java | 56 +-
.../apache/parquet/benchmarks/ReadBenchmarks.java | 25 +
.../apache/parquet/benchmarks/WriteBenchmarks.java | 11 +-
.../src/main}/resources/log4j.properties | 6 +-
parquet-cli/pom.xml | 12 +-
.../java/org/apache/parquet/cli/BaseCommand.java | 4 +-
.../apache/parquet/cli/commands/ToAvroCommand.java | 22 +-
.../org/apache/parquet/cli/csv/RecordBuilder.java | 2 +-
.../java/org/apache/parquet/cli/json/AvroJson.java | 12 +-
.../java/org/apache/parquet/cli/util/Codecs.java | 2 +
.../apache/parquet/cli/util/GetClassLoader.java | 2 +-
.../apache/parquet/cli/commands/AvroFileTest.java | 5 +
.../parquet/cli/commands/ToAvroCommandTest.java | 63 +-
parquet-column/pom.xml | 22 -
.../org/apache/parquet/CorruptDeltaByteArrays.java | 5 +-
.../java/org/apache/parquet/CorruptStatistics.java | 12 +-
.../org/apache/parquet/column/EncodingStats.java | 14 +-
.../apache/parquet/column/ParquetProperties.java | 37 +-
.../parquet/column/impl/ColumnReadStoreImpl.java | 4 +-
.../column/statistics/BinaryStatistics.java | 11 +
.../values/plain/BinaryPlainValuesReader.java | 8 +-
.../parquet/filter2/predicate/Operators.java | 4 +-
.../recordlevel/FilteringGroupConverter.java | 7 +-
.../recordlevel/FilteringRecordMaterializer.java | 48 +-
.../columnindex/BinaryColumnIndexBuilder.java | 4 +-
.../column/columnindex/BinaryTruncator.java | 16 +-
.../org/apache/parquet/io/PrimitiveColumnIO.java | 2 +-
.../parquet/io/RecordReaderImplementation.java | 2 +-
.../java/org/apache/parquet/io/api/Binary.java | 7 +-
.../java/org/apache/parquet/schema/GroupType.java | 5 +-
.../parquet/schema/LogicalTypeAnnotation.java | 6 -
.../main/java/org/apache/parquet/schema/Types.java | 6 +-
.../apache/parquet/schema/TestTypeBuilders.java | 245 ++---
.../org/apache/parquet/schema/TestTypeUtil.java | 39 +-
parquet-common/pom.xml | 2 +-
.../src/main/java/org/apache/parquet/Strings.java | 6 +
.../java/org/apache/parquet/VersionParser.java | 5 +-
.../apache/parquet/hadoop/metadata/ColumnPath.java | 4 +-
.../org/apache/parquet/util/DynConstructors.java | 22 +-
.../java/org/apache/parquet/util/DynMethods.java | 6 +-
.../parquet/bytes/TestByteBufferInputStreams.java | 50 +-
.../io/TestDelegatingSeekableInputStream.java | 43 +-
.../apache/parquet/util/TestDynConstructors.java | 102 +-
.../org/apache/parquet/util/TestDynMethods.java | 137 +--
.../parquet/column/values/bitpacking/Packer.java | 11 +-
parquet-format-structures/pom.xml | 8 +-
.../org/apache/parquet/format/event/Consumers.java | 4 +-
parquet-hadoop/pom.xml | 2 +-
.../org/apache/parquet/ParquetReadOptions.java | 9 +-
.../format/converter/ParquetMetadataConverter.java | 72 +-
.../parquet/hadoop/ColumnChunkPageReadStore.java | 17 +-
.../parquet/hadoop/ColumnIndexValidator.java | 613 ++++++++++++
.../parquet/hadoop/DictionaryPageReader.java | 70 +-
.../apache/parquet/hadoop/DirectCodecFactory.java | 8 +-
.../org/apache/parquet/hadoop/MemoryManager.java | 4 +-
.../apache/parquet/hadoop/ParquetFileReader.java | 67 +-
.../apache/parquet/hadoop/ParquetFileWriter.java | 24 +-
.../apache/parquet/hadoop/ParquetInputFormat.java | 11 +-
.../apache/parquet/hadoop/ParquetOutputFormat.java | 29 +-
.../org/apache/parquet/hadoop/ParquetWriter.java | 3 +-
.../org/apache/parquet/hadoop/PrintFooter.java | 14 +-
.../org/apache/parquet/hadoop/codec/CleanUtil.java | 1 -
.../parquet/hadoop/metadata/ParquetMetadata.java | 8 -
.../apache/parquet/hadoop/util/ContextUtil.java | 32 +-
.../apache/parquet/hadoop/util/HadoopStreams.java | 13 +-
.../converter/TestParquetMetadataConverter.java | 74 +-
.../apache/parquet/hadoop/TestMemoryManager.java | 7 +-
.../apache/parquet/hadoop/TestParquetWriter.java | 17 +-
.../hadoop/TestParquetWriterAppendBlocks.java | 22 +-
.../hadoop/example/TestInputOutputFormat.java | 3 +-
.../hadoop/util/TestHadoop2ByteBufferReads.java | 18 +-
.../apache/parquet/statistics/RandomValues.java | 46 +-
.../parquet/statistics/TestColumnIndexes.java | 300 ++++++
parquet-jackson/pom.xml | 6 +-
parquet-pig/pom.xml | 2 +-
.../java/org/apache/parquet/pig/ParquetLoader.java | 5 +-
.../org/apache/parquet/pig/TupleWriteSupport.java | 4 +-
.../apache/parquet/pig/convert/TupleConverter.java | 7 +-
.../apache/parquet/pig/summary/SummaryData.java | 8 -
parquet-protobuf/pom.xml | 7 +
.../apache/parquet/proto/ProtoSchemaConverter.java | 2 +-
parquet-scrooge/pom.xml | 22 +-
parquet-scrooge/src/test/thrift/test.thrift | 14 +-
parquet-thrift/pom.xml | 9 +-
.../hadoop/thrift/ThriftBytesWriteSupport.java | 4 +-
.../parquet/hadoop/thrift/ThriftReadSupport.java | 8 +-
.../thrift/BufferedProtocolReadToWrite.java | 4 +-
.../apache/parquet/thrift/ParquetReadProtocol.java | 4 +-
.../parquet/thrift/ThriftSchemaConvertVisitor.java | 2 +-
.../thrift/struct/CompatibilityChecker.java | 4 +-
.../parquet/thrift/TestThriftRecordConverter.java | 5 +-
.../parquet/thrift/TestThriftSchemaConverter.java | 13 +
parquet-thrift/src/test/thrift/test.thrift | 6 +-
parquet-tools/README.md | 4 +-
parquet-tools/pom.xml | 1 -
.../parquet/tools/command/MetadataUtils.java | 2 +-
.../apache/parquet/tools/util/MetadataUtils.java | 2 +-
pom.xml | 151 ++-
117 files changed, 2642 insertions(+), 1773 deletions(-)
rename parquet-benchmarks/run_checksums.sh => dev/finalize-release (50%)
mode change 100644 => 100755 dev/source-release.sh
mode change 100644 => 100755 dev/travis-before_install.sh
copy {parquet-hadoop/src/test => parquet-benchmarks/src/main}/resources/log4j.properties (88%)
create mode 100644 parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexValidator.java
create mode 100644 parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestColumnIndexes.java