You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/10/29 00:30:49 UTC
[GitHub] [pinot] richardstartin opened a new pull request #7661: implement balanced V4 raw chunk format
richardstartin opened a new pull request #7661:
URL: https://github.com/apache/pinot/pull/7661
## Description
*DO NOT REVIEW*
## Upgrade Notes
Does this PR prevent a zero down-time upgrade? (Assume upgrade order: Controller, Broker, Server, Minion)
* [ ] Yes (Please label as **<code>backward-incompat</code>**, and complete the section below on Release Notes)
Does this PR fix a zero-downtime upgrade introduced earlier?
* [ ] Yes (Please label this as **<code>backward-incompat</code>**, and complete the section below on Release Notes)
Does this PR otherwise need attention when creating release notes? Things to consider:
- New configuration options
- Deprecation of configurations
- Signature changes to public methods/interfaces
- New plugins added or old plugins removed
* [ ] Yes (Please label this PR as **<code>release-notes</code>** and complete the section on Release Notes)
## Release Notes
<!-- If you have tagged this as either backward-incompat or release-notes,
you MUST add text here that you would like to see appear in release notes of the
next release. -->
<!-- If you have a series of commits adding or enabling a feature, then
add this section only in final commit that marks the feature completed.
Refer to earlier release notes to see examples of text.
-->
## Documentation
<!-- If you have introduced a new feature or configuration, please add it to the documentation as well.
See https://docs.pinot.apache.org/developers/developers-and-contributors/update-document
-->
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r741359290
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
Review comment:
Well, it can’t be less than but I see your point. I will include some ascii art to document the layout.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (47a2cf3) into [master](https://codecov.io/gh/apache/pinot/commit/e80198db7f71f6c043e4ffc728d6cfce6eb9b729?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (e80198d) will **decrease** coverage by `56.97%`.
> The diff coverage is `0.00%`.
> :exclamation: Current head 47a2cf3 differs from pull request most recent head a702755. Consider uploading reports for the commit a702755 to get more accurate results
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
=============================================
- Coverage 71.50% 14.52% -56.98%
+ Complexity 4030 80 -3950
=============================================
Files 1581 1537 -44
Lines 80400 78778 -1622
Branches 11943 11768 -175
=============================================
- Hits 57486 11441 -46045
- Misses 19026 66500 +47474
+ Partials 3888 837 -3051
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `?` | |
| integration2 | `?` | |
| unittests1 | `?` | |
| unittests2 | `14.52% <0.00%> (-0.02%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../pinot/common/utils/ClientSSLContextGenerator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vdXRpbHMvQ2xpZW50U1NMQ29udGV4dEdlbmVyYXRvci5qYXZh) | `0.00% <0.00%> (-31.04%)` | :arrow_down: |
| [...e/pinot/common/utils/FileUploadDownloadClient.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vdXRpbHMvRmlsZVVwbG9hZERvd25sb2FkQ2xpZW50LmphdmE=) | `0.00% <0.00%> (-66.95%)` | :arrow_down: |
| [.../core/operator/combine/GroupByCombineOperator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9vcGVyYXRvci9jb21iaW5lL0dyb3VwQnlDb21iaW5lT3BlcmF0b3IuamF2YQ==) | `0.00% <0.00%> (-79.37%)` | :arrow_down: |
| [...reaming/StreamingSelectionOnlyCombineOperator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9vcGVyYXRvci9zdHJlYW1pbmcvU3RyZWFtaW5nU2VsZWN0aW9uT25seUNvbWJpbmVPcGVyYXRvci5qYXZh) | `0.00% <0.00%> (-71.12%)` | :arrow_down: |
| [.../writer/impl/VarByteChunkSVForwardIndexWriter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlci5qYXZh) | `0.00% <ø> (-100.00%)` | :arrow_down: |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `0.00% <0.00%> (ø)` | |
| [...r/impl/fwd/MultiValueFixedByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvTXVsdGlWYWx1ZUZpeGVkQnl0ZVJhd0luZGV4Q3JlYXRvci5qYXZh) | `0.00% <0.00%> (-93.03%)` | :arrow_down: |
| [...or/impl/fwd/SingleValueVarByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvU2luZ2xlVmFsdWVWYXJCeXRlUmF3SW5kZXhDcmVhdG9yLmphdmE=) | `0.00% <0.00%> (-84.22%)` | :arrow_down: |
| [...ent/index/column/PhysicalColumnIndexContainer.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L2NvbHVtbi9QaHlzaWNhbENvbHVtbkluZGV4Q29udGFpbmVyLmphdmE=) | `0.00% <0.00%> (-81.76%)` | :arrow_down: |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `0.00% <0.00%> (-92.65%)` | :arrow_down: |
| ... and [1264 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [e80198d...a702755](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] kkrugler commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
kkrugler commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-960118748
Hi @richardstartin - hard to say whether we'd go for fewer/bigger partitions now or not, as we've tuned the Flink workflows and our Pinot cluster for the current configuration (34 segments/month, and 60 months). And I wish I had a better grasp of Pinot metrics to determine whether our current segment sizes were reasonably optimal or not - it feels like a variant of the Goldilocks problem...not too big, and not too small.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743434145
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
No, this would result in an off by one (or off by 4 really) bug because of the offset for the number of docs at the start of the buffer. In fact, try it yourself and run the tests.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
Review comment:
Yes, good point.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while compressing/writing data chunk", e);
+ throw new RuntimeException(e);
+ } finally {
+ CleanerUtil.cleanQuietly(target);
+ }
+ }
+
+ private void clearChunkBuffer() {
+ _chunkBuffer.clear();
+ _chunkBuffer.position(Integer.BYTES);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ flushChunk();
+ // write out where the chunks start into slot reserved at offset 12
+ _output.seek(3 * Integer.BYTES);
+ _output.writeInt(_metadataSize);
+ _output.seek(_metadataSize);
+ _dataChannel.truncate(_chunkOffset);
+ _output.setLength(_metadataSize + _chunkOffset);
+ long total = _chunkOffset;
+ long position = 0;
+ while (total > 0) {
+ long transferred = _dataChannel.transferTo(position, total, _output.getChannel());
+ total -= transferred;
+ position += transferred;
+ }
+ _dataChannel.close();
+ _output.close();
+ FileUtils.deleteQuietly(_dataBuffer);
Review comment:
Good catch.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
Review comment:
I agree
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
Well, this was compensating for another bug, it's fixed now.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkWriter.java
##########
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.Closeable;
+
+
+public interface VarByteChunkWriter extends Closeable {
+ void putString(String value);
Review comment:
I inherited this behaviour from the current variable length writer so I could extract an interface without changing its behaviour.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
Review comment:
See above.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (5848057) into [master](https://codecov.io/gh/apache/pinot/commit/e80198db7f71f6c043e4ffc728d6cfce6eb9b729?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (e80198d) will **increase** coverage by `0.03%`.
> The diff coverage is `91.20%`.
> :exclamation: Current head 5848057 differs from pull request most recent head 95ad66b. Consider uploading reports for the commit 95ad66b to get more accurate results
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
============================================
+ Coverage 71.50% 71.53% +0.03%
- Complexity 4030 4058 +28
============================================
Files 1581 1583 +2
Lines 80400 80645 +245
Branches 11943 11968 +25
============================================
+ Hits 57486 57689 +203
- Misses 19026 19065 +39
- Partials 3888 3891 +3
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `28.90% <0.00%> (-0.17%)` | :arrow_down: |
| integration2 | `27.57% <0.00%> (-0.02%)` | :arrow_down: |
| unittests1 | `68.69% <91.20%> (+0.09%)` | :arrow_up: |
| unittests2 | `14.50% <0.00%> (-0.04%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../writer/impl/VarByteChunkSVForwardIndexWriter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlci5qYXZh) | `100.00% <ø> (ø)` | |
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `80.00% <50.00%> (-20.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `36.50% <50.00%> (+1.42%)` | :arrow_up: |
| [...r/impl/fwd/MultiValueFixedByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvTXVsdGlWYWx1ZUZpeGVkQnl0ZVJhd0luZGV4Q3JlYXRvci5qYXZh) | `91.11% <66.66%> (-1.92%)` | :arrow_down: |
| [...or/impl/fwd/SingleValueVarByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvU2luZ2xlVmFsdWVWYXJCeXRlUmF3SW5kZXhDcmVhdG9yLmphdmE=) | `80.95% <66.66%> (-3.26%)` | :arrow_down: |
| [...ent/index/column/PhysicalColumnIndexContainer.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L2NvbHVtbi9QaHlzaWNhbENvbHVtbkluZGV4Q29udGFpbmVyLmphdmE=) | `80.85% <66.66%> (-0.91%)` | :arrow_down: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `92.10% <92.10%> (ø)` | |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `96.46% <96.46%> (ø)` | |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `92.64% <100.00%> (ø)` | |
| [...nction/DistinctCountBitmapAggregationFunction.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9xdWVyeS9hZ2dyZWdhdGlvbi9mdW5jdGlvbi9EaXN0aW5jdENvdW50Qml0bWFwQWdncmVnYXRpb25GdW5jdGlvbi5qYXZh) | `41.45% <0.00%> (-10.37%)` | :arrow_down: |
| ... and [19 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [e80198d...95ad66b](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] kkrugler commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
kkrugler commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-960291492
@richardstartin - yes, 4GB should be fine. The point I should have made better is that general rules of thumb about sizing Pinot segments are mostly based on # of queryable columns per segment (and thus the impact on query performance). But in our situation it was just a chunk of non-queryable text data.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter commented on pull request #7661: implement balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (318c9d7) into [master](https://codecov.io/gh/apache/pinot/commit/db11e0fda8f27651b0b1fc30d0aea6a25bf7eccb?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (db11e0f) will **decrease** coverage by `57.08%`.
> The diff coverage is `0.00%`.
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
=============================================
- Coverage 71.58% 14.50% -57.09%
+ Complexity 4014 80 -3934
=============================================
Files 1578 1534 -44
Lines 80074 78544 -1530
Branches 11868 11720 -148
=============================================
- Hits 57323 11391 -45932
- Misses 18887 66327 +47440
+ Partials 3864 826 -3038
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `?` | |
| integration2 | `?` | |
| unittests1 | `?` | |
| unittests2 | `14.50% <0.00%> (-0.07%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `0.00% <0.00%> (ø)` | |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `0.00% <0.00%> (-92.65%)` | :arrow_down: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `0.00% <0.00%> (ø)` | |
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `0.00% <0.00%> (-35.09%)` | :arrow_down: |
| [...c/main/java/org/apache/pinot/common/tier/Tier.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vdGllci9UaWVyLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...ain/java/org/apache/pinot/core/data/table/Key.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9kYXRhL3RhYmxlL0tleS5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [.../java/org/apache/pinot/spi/utils/BooleanUtils.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc3BpL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9zcGkvdXRpbHMvQm9vbGVhblV0aWxzLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [.../java/org/apache/pinot/core/data/table/Record.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9kYXRhL3RhYmxlL1JlY29yZC5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [.../java/org/apache/pinot/core/util/GroupByUtils.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS91dGlsL0dyb3VwQnlVdGlscy5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| ... and [1253 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [db11e0f...318c9d7](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r740177595
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
+ int numDocs = _nextDocId - _docIdOffset;
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ _chunkBuffer.putInt(0, numDocs);
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
Review comment:
Indeed, actually, the check doesn't need to go here, but before the next attempt to store a value, because we can actually store slightly more than 4GB in compressed data, we just can't store an offset after 4GB has been breached.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r741359290
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
Review comment:
Well, it can’t be less than but I see your point. I will include some ascii art to document the layout.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743358730
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
Review comment:
I think this should be `>`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
Review comment:
Should this also be `>`?
Suggest adding some comments on why do we need to reserve 4 extra bytes (I assume it is for the `numValuesInChunk` header)
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
Review comment:
(minor) Suggest adding parentheses for readability
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
Should this be:
```suggestion
source.position(copyFrom).limit(copyFrom + valueLengths[i]);
```
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while compressing/writing data chunk", e);
+ throw new RuntimeException(e);
+ } finally {
+ CleanerUtil.cleanQuietly(target);
+ }
+ }
+
+ private void clearChunkBuffer() {
+ _chunkBuffer.clear();
+ _chunkBuffer.position(Integer.BYTES);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ flushChunk();
+ // write out where the chunks start into slot reserved at offset 12
+ _output.seek(3 * Integer.BYTES);
+ _output.writeInt(_metadataSize);
+ _output.seek(_metadataSize);
+ _dataChannel.truncate(_chunkOffset);
+ _output.setLength(_metadataSize + _chunkOffset);
+ long total = _chunkOffset;
+ long position = 0;
+ while (total > 0) {
+ long transferred = _dataChannel.transferTo(position, total, _output.getChannel());
+ total -= transferred;
+ position += transferred;
+ }
+ _dataChannel.close();
+ _output.close();
+ FileUtils.deleteQuietly(_dataBuffer);
Review comment:
Should also release the `_chunkBuffer`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
Review comment:
For a write method, might be better to throw the `IOException` and expect the caller to handle it
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
Review comment:
Why do we need to allocate a separate direct buffer for these 2 compression types? Some comments would be good
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkWriter.java
##########
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.Closeable;
+
+
+public interface VarByteChunkWriter extends Closeable {
+ void putString(String value);
Review comment:
As a writer interface, I feel it might be better to allow these 2 methods to throw `IOException`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
Review comment:
The offsets can be updated along with the value copy, since we need to position the `_chunkBuffer` at the final offset
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r741910523
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
+ int numDocs = _nextDocId - _docIdOffset;
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ _chunkBuffer.putInt(0, numDocs);
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
Review comment:
See here: https://github.com/apache/pinot/pull/7661/files#diff-8ef4905133d398798c3bbacd29dfd80e0c687ec0b9f66352deeb4560be2e4365R96
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r741911293
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
Review comment:
See here: https://github.com/apache/pinot/pull/7661/files#diff-8ef4905133d398798c3bbacd29dfd80e0c687ec0b9f66352deeb4560be2e4365R140-R144
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] kkrugler commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
kkrugler commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-960118748
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] klsince commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
klsince commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r742140045
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a-bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a-bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
Review comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-960155321
@kkrugler What I'm getting at is I suspect the limit pushed you in the right direction, and a 4GB limit should be comfortable, especially if values are packed in to chunks effectively.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r740177595
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
+ int numDocs = _nextDocId - _docIdOffset;
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ _chunkBuffer.putInt(0, numDocs);
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
Review comment:
Indeed, actually, the check doesn't need to go here, but before the next attempt to store a value, because we can actually store slightly more than 4GB of compressed data, we just can't store an offset after 4GB has been breached.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (95aa430) into [master](https://codecov.io/gh/apache/pinot/commit/a875c99df978ace644863d8865306989089d4542?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (a875c99) will **decrease** coverage by `2.90%`.
> The diff coverage is `92.76%`.
> :exclamation: Current head 95aa430 differs from pull request most recent head 0a86401. Consider uploading reports for the commit 0a86401 to get more accurate results
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
============================================
- Coverage 71.60% 68.70% -2.91%
+ Complexity 4022 3966 -56
============================================
Files 1578 1185 -393
Lines 80207 58045 -22162
Branches 11904 8905 -2999
============================================
- Hits 57436 39880 -17556
+ Misses 18902 15347 -3555
+ Partials 3869 2818 -1051
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `?` | |
| integration2 | `?` | |
| unittests1 | `68.70% <92.76%> (+0.08%)` | :arrow_up: |
| unittests2 | `?` | |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `80.00% <50.00%> (-20.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `36.50% <50.00%> (+1.42%)` | :arrow_up: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `92.10% <92.10%> (ø)` | |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `97.27% <97.27%> (ø)` | |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `92.64% <100.00%> (ø)` | |
| [...a/org/apache/pinot/common/metrics/MinionMeter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9NaW5pb25NZXRlci5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...g/apache/pinot/common/metrics/ControllerMeter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9Db250cm9sbGVyTWV0ZXIuamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [.../apache/pinot/common/metrics/BrokerQueryPhase.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9Ccm9rZXJRdWVyeVBoYXNlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [.../apache/pinot/common/metrics/MinionQueryPhase.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9NaW5pb25RdWVyeVBoYXNlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...he/pinot/common/messages/SegmentReloadMessage.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWVzc2FnZXMvU2VnbWVudFJlbG9hZE1lc3NhZ2UuamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| ... and [602 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [a875c99...0a86401](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (bc891cb) into [master](https://codecov.io/gh/apache/pinot/commit/2075cbef39008e0faa3b4f78d9f0a2cb47b3deb0?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (2075cbe) will **decrease** coverage by `55.85%`.
> The diff coverage is `0.00%`.
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
=============================================
- Coverage 70.34% 14.48% -55.86%
+ Complexity 4035 80 -3955
=============================================
Files 1582 1538 -44
Lines 80553 78917 -1636
Branches 11967 11787 -180
=============================================
- Hits 56661 11429 -45232
- Misses 20007 66649 +46642
+ Partials 3885 839 -3046
```
| Flag | Coverage Δ | |
|---|---|---|
| integration2 | `?` | |
| unittests1 | `?` | |
| unittests2 | `14.48% <0.00%> (-0.05%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../writer/impl/VarByteChunkSVForwardIndexWriter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlci5qYXZh) | `0.00% <ø> (-100.00%)` | :arrow_down: |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `0.00% <0.00%> (ø)` | |
| [...r/impl/fwd/MultiValueFixedByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvTXVsdGlWYWx1ZUZpeGVkQnl0ZVJhd0luZGV4Q3JlYXRvci5qYXZh) | `0.00% <0.00%> (-93.03%)` | :arrow_down: |
| [...or/impl/fwd/SingleValueVarByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvU2luZ2xlVmFsdWVWYXJCeXRlUmF3SW5kZXhDcmVhdG9yLmphdmE=) | `0.00% <0.00%> (-84.22%)` | :arrow_down: |
| [...ent/index/column/PhysicalColumnIndexContainer.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L2NvbHVtbi9QaHlzaWNhbENvbHVtbkluZGV4Q29udGFpbmVyLmphdmE=) | `0.00% <0.00%> (-81.76%)` | :arrow_down: |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `0.00% <0.00%> (-92.65%)` | :arrow_down: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `0.00% <0.00%> (ø)` | |
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `0.00% <0.00%> (-35.09%)` | :arrow_down: |
| [...c/main/java/org/apache/pinot/common/tier/Tier.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vdGllci9UaWVyLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| ... and [1240 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [2075cbe...bc891cb](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-956541373
@kkrugler that's great feedback. Given that the constraint was lifted last year when version 3 of the format was implemented, it would be interesting to know whether you would keep the controls in place for your use case (because there are benefits in doing so), or where you would draw the line if adjusting the partitioning limits.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] klsince commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
klsince commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r742140045
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a-bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a-bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
Review comment:
👍
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a-bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a-bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
Review comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r746223372
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
+
+ private final FieldSpec.DataType _valueType;
+ private final int _targetDecompressedChunkSize;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ private final PinotDataBuffer _metadata;
+ private final PinotDataBuffer _chunks;
+
+ public VarByteChunkSVForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.DataType valueType) {
+ if (dataBuffer.getInt(0) < VarByteChunkSVForwardIndexWriterV4.VERSION) {
+ throw new IllegalStateException("version " + dataBuffer.getInt(0) + " < "
+ + VarByteChunkSVForwardIndexWriterV4.VERSION);
+ }
+ _valueType = valueType;
+ _targetDecompressedChunkSize = dataBuffer.getInt(4);
+ _chunkCompressionType = ChunkCompressionType.valueOf(dataBuffer.getInt(8));
+ _chunkDecompressor = ChunkCompressorFactory.getDecompressor(_chunkCompressionType);
+ int chunksOffset = dataBuffer.getInt(12);
+ // the file has a BE header for compatability reasons (version selection) but the content is LE
+ _metadata = dataBuffer.view(16, chunksOffset, ByteOrder.LITTLE_ENDIAN);
+ _chunks = dataBuffer.view(chunksOffset, dataBuffer.size(), ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ public boolean isDictionaryEncoded() {
+ return false;
+ }
+
+ @Override
+ public boolean isSingleValue() {
+ return true;
+ }
+
+ @Override
+ public FieldSpec.DataType getValueType() {
+ return _valueType;
+ }
+
+ @Override
+ public String getString(int docId, ReaderContext context) {
+ return new String(context.getValue(docId), StandardCharsets.UTF_8);
Review comment:
Sounds good. Let's revisit if this shows up in the profiler
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] Jackie-Jiang merged pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
Jackie-Jiang merged pull request #7661:
URL: https://github.com/apache/pinot/pull/7661
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (47a2cf3) into [master](https://codecov.io/gh/apache/pinot/commit/e80198db7f71f6c043e4ffc728d6cfce6eb9b729?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (e80198d) will **decrease** coverage by `56.97%`.
> The diff coverage is `0.00%`.
> :exclamation: Current head 47a2cf3 differs from pull request most recent head a702755. Consider uploading reports for the commit a702755 to get more accurate results
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
=============================================
- Coverage 71.50% 14.52% -56.98%
+ Complexity 4030 80 -3950
=============================================
Files 1581 1537 -44
Lines 80400 78778 -1622
Branches 11943 11768 -175
=============================================
- Hits 57486 11441 -46045
- Misses 19026 66500 +47474
+ Partials 3888 837 -3051
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `?` | |
| integration2 | `?` | |
| unittests1 | `?` | |
| unittests2 | `14.52% <0.00%> (-0.02%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../pinot/common/utils/ClientSSLContextGenerator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vdXRpbHMvQ2xpZW50U1NMQ29udGV4dEdlbmVyYXRvci5qYXZh) | `0.00% <0.00%> (-31.04%)` | :arrow_down: |
| [...e/pinot/common/utils/FileUploadDownloadClient.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vdXRpbHMvRmlsZVVwbG9hZERvd25sb2FkQ2xpZW50LmphdmE=) | `0.00% <0.00%> (-66.95%)` | :arrow_down: |
| [.../core/operator/combine/GroupByCombineOperator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9vcGVyYXRvci9jb21iaW5lL0dyb3VwQnlDb21iaW5lT3BlcmF0b3IuamF2YQ==) | `0.00% <0.00%> (-79.37%)` | :arrow_down: |
| [...reaming/StreamingSelectionOnlyCombineOperator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9vcGVyYXRvci9zdHJlYW1pbmcvU3RyZWFtaW5nU2VsZWN0aW9uT25seUNvbWJpbmVPcGVyYXRvci5qYXZh) | `0.00% <0.00%> (-71.12%)` | :arrow_down: |
| [.../writer/impl/VarByteChunkSVForwardIndexWriter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlci5qYXZh) | `0.00% <ø> (-100.00%)` | :arrow_down: |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `0.00% <0.00%> (ø)` | |
| [...r/impl/fwd/MultiValueFixedByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvTXVsdGlWYWx1ZUZpeGVkQnl0ZVJhd0luZGV4Q3JlYXRvci5qYXZh) | `0.00% <0.00%> (-93.03%)` | :arrow_down: |
| [...or/impl/fwd/SingleValueVarByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvU2luZ2xlVmFsdWVWYXJCeXRlUmF3SW5kZXhDcmVhdG9yLmphdmE=) | `0.00% <0.00%> (-84.22%)` | :arrow_down: |
| [...ent/index/column/PhysicalColumnIndexContainer.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L2NvbHVtbi9QaHlzaWNhbENvbHVtbkluZGV4Q29udGFpbmVyLmphdmE=) | `0.00% <0.00%> (-81.76%)` | :arrow_down: |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `0.00% <0.00%> (-92.65%)` | :arrow_down: |
| ... and [1264 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [e80198d...a702755](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (5848057) into [master](https://codecov.io/gh/apache/pinot/commit/e80198db7f71f6c043e4ffc728d6cfce6eb9b729?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (e80198d) will **decrease** coverage by `1.15%`.
> The diff coverage is `91.20%`.
> :exclamation: Current head 5848057 differs from pull request most recent head 95ad66b. Consider uploading reports for the commit 95ad66b to get more accurate results
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
============================================
- Coverage 71.50% 70.34% -1.16%
- Complexity 4030 4058 +28
============================================
Files 1581 1583 +2
Lines 80400 80645 +245
Branches 11943 11968 +25
============================================
- Hits 57486 56726 -760
- Misses 19026 20035 +1009
+ Partials 3888 3884 -4
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `?` | |
| integration2 | `27.57% <0.00%> (-0.02%)` | :arrow_down: |
| unittests1 | `68.69% <91.20%> (+0.09%)` | :arrow_up: |
| unittests2 | `14.50% <0.00%> (-0.04%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../writer/impl/VarByteChunkSVForwardIndexWriter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlci5qYXZh) | `100.00% <ø> (ø)` | |
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `80.00% <50.00%> (-20.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `36.50% <50.00%> (+1.42%)` | :arrow_up: |
| [...r/impl/fwd/MultiValueFixedByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvTXVsdGlWYWx1ZUZpeGVkQnl0ZVJhd0luZGV4Q3JlYXRvci5qYXZh) | `91.11% <66.66%> (-1.92%)` | :arrow_down: |
| [...or/impl/fwd/SingleValueVarByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvU2luZ2xlVmFsdWVWYXJCeXRlUmF3SW5kZXhDcmVhdG9yLmphdmE=) | `80.95% <66.66%> (-3.26%)` | :arrow_down: |
| [...ent/index/column/PhysicalColumnIndexContainer.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L2NvbHVtbi9QaHlzaWNhbENvbHVtbkluZGV4Q29udGFpbmVyLmphdmE=) | `80.85% <66.66%> (-0.91%)` | :arrow_down: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `92.10% <92.10%> (ø)` | |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `96.46% <96.46%> (ø)` | |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `92.64% <100.00%> (ø)` | |
| [...pinot/minion/exception/TaskCancelledException.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtbWluaW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9taW5pb24vZXhjZXB0aW9uL1Rhc2tDYW5jZWxsZWRFeGNlcHRpb24uamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| ... and [103 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [e80198d...95ad66b](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743434145
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
No, this would result in an off by one (or off by 4 really) bug because of the offset for the number of docs at the start of the buffer. In fact, try it yourself and run the tests.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-960155321
@kkrugler What I'm getting at is I suspect the limit pushed you in the right direction, and a 4GB limit should be comfortable, especially if values are packed in to chunks effectively.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743358730
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
Review comment:
I think this should be `>`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
Review comment:
Should this also be `>`?
Suggest adding some comments on why do we need to reserve 4 extra bytes (I assume it is for the `numValuesInChunk` header)
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
Review comment:
(minor) Suggest adding parentheses for readability
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
Should this be:
```suggestion
source.position(copyFrom).limit(copyFrom + valueLengths[i]);
```
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while compressing/writing data chunk", e);
+ throw new RuntimeException(e);
+ } finally {
+ CleanerUtil.cleanQuietly(target);
+ }
+ }
+
+ private void clearChunkBuffer() {
+ _chunkBuffer.clear();
+ _chunkBuffer.position(Integer.BYTES);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ flushChunk();
+ // write out where the chunks start into slot reserved at offset 12
+ _output.seek(3 * Integer.BYTES);
+ _output.writeInt(_metadataSize);
+ _output.seek(_metadataSize);
+ _dataChannel.truncate(_chunkOffset);
+ _output.setLength(_metadataSize + _chunkOffset);
+ long total = _chunkOffset;
+ long position = 0;
+ while (total > 0) {
+ long transferred = _dataChannel.transferTo(position, total, _output.getChannel());
+ total -= transferred;
+ position += transferred;
+ }
+ _dataChannel.close();
+ _output.close();
+ FileUtils.deleteQuietly(_dataBuffer);
Review comment:
Should also release the `_chunkBuffer`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
Review comment:
For a write method, might be better to throw the `IOException` and expect the caller to handle it
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
Review comment:
Why do we need to allocate a separate direct buffer for these 2 compression types? Some comments would be good
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkWriter.java
##########
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.Closeable;
+
+
+public interface VarByteChunkWriter extends Closeable {
+ void putString(String value);
Review comment:
As a writer interface, I feel it might be better to allow these 2 methods to throw `IOException`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
Review comment:
The offsets can be updated along with the value copy, since we need to position the `_chunkBuffer` at the final offset
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (318c9d7) into [master](https://codecov.io/gh/apache/pinot/commit/db11e0fda8f27651b0b1fc30d0aea6a25bf7eccb?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (db11e0f) will **decrease** coverage by `6.34%`.
> The diff coverage is `90.95%`.
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
============================================
- Coverage 71.58% 65.24% -6.35%
- Complexity 4014 4045 +31
============================================
Files 1578 1534 -44
Lines 80074 78544 -1530
Branches 11868 11720 -148
============================================
- Hits 57323 51243 -6080
- Misses 18887 23656 +4769
+ Partials 3864 3645 -219
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `?` | |
| integration2 | `?` | |
| unittests1 | `68.69% <90.95%> (+<0.01%)` | :arrow_up: |
| unittests2 | `14.50% <0.00%> (-0.07%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `80.00% <50.00%> (-20.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `36.50% <50.00%> (+1.42%)` | :arrow_up: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `91.58% <91.58%> (ø)` | |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `95.06% <95.06%> (ø)` | |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `92.64% <100.00%> (ø)` | |
| [...a/org/apache/pinot/common/metrics/MinionMeter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9NaW5pb25NZXRlci5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...g/apache/pinot/common/metrics/ControllerMeter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9Db250cm9sbGVyTWV0ZXIuamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [.../apache/pinot/common/metrics/BrokerQueryPhase.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9Ccm9rZXJRdWVyeVBoYXNlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [.../apache/pinot/common/metrics/MinionQueryPhase.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9NaW5pb25RdWVyeVBoYXNlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...he/pinot/common/messages/SegmentReloadMessage.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWVzc2FnZXMvU2VnbWVudFJlbG9hZE1lc3NhZ2UuamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| ... and [359 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [db11e0f...318c9d7](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] klsince commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
klsince commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r741338323
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
Review comment:
nit: add a comment that the two metadata entries are the starting docId of the chunk and the chunk offset.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
Review comment:
It may help to understand this method to describe a bit about the data layout before and after in method comment.
nit: save a few indents
```
if (_nextDocId <= _docIdOffset) {
return;
}
...
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (453d1ea) into [master](https://codecov.io/gh/apache/pinot/commit/a875c99df978ace644863d8865306989089d4542?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (a875c99) will **increase** coverage by `0.00%`.
> The diff coverage is `90.68%`.
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
==========================================
Coverage 71.60% 71.61%
- Complexity 4022 4046 +24
==========================================
Files 1578 1580 +2
Lines 80207 80449 +242
Branches 11904 11929 +25
==========================================
+ Hits 57436 57614 +178
- Misses 18902 18954 +52
- Partials 3869 3881 +12
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `28.96% <0.00%> (-0.24%)` | :arrow_down: |
| integration2 | `27.64% <0.00%> (-0.18%)` | :arrow_down: |
| unittests1 | `68.70% <90.68%> (+0.08%)` | :arrow_up: |
| unittests2 | `14.49% <0.00%> (-0.05%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../writer/impl/VarByteChunkSVForwardIndexWriter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlci5qYXZh) | `100.00% <ø> (ø)` | |
| [...ent/index/column/PhysicalColumnIndexContainer.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L2NvbHVtbi9QaHlzaWNhbENvbHVtbkluZGV4Q29udGFpbmVyLmphdmE=) | `79.43% <33.33%> (-0.86%)` | :arrow_down: |
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `80.00% <50.00%> (-20.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `36.50% <50.00%> (+1.42%)` | :arrow_up: |
| [...r/impl/fwd/MultiValueFixedByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvTXVsdGlWYWx1ZUZpeGVkQnl0ZVJhd0luZGV4Q3JlYXRvci5qYXZh) | `91.11% <66.66%> (-1.92%)` | :arrow_down: |
| [...or/impl/fwd/SingleValueVarByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvU2luZ2xlVmFsdWVWYXJCeXRlUmF3SW5kZXhDcmVhdG9yLmphdmE=) | `80.95% <66.66%> (-3.26%)` | :arrow_down: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `92.10% <92.10%> (ø)` | |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `97.27% <97.27%> (ø)` | |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `92.64% <100.00%> (ø)` | |
| [...data/manager/realtime/SegmentCommitterFactory.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9kYXRhL21hbmFnZXIvcmVhbHRpbWUvU2VnbWVudENvbW1pdHRlckZhY3RvcnkuamF2YQ==) | `64.70% <0.00%> (-29.42%)` | :arrow_down: |
| ... and [25 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [a875c99...453d1ea](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] kkrugler commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
kkrugler commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-956505930
Hi @richardstartin - Just FYI, I agree that 4GB of compressed data is large. But we hit the 2GB limit, due to storing crawled web pages (many of which were > 100K/page) in a column, when we had more than 100K rows (IIRC) in our per-month segment. We wound up sub-partitioning each month, which was fine, but added complexity to our ETL workflow.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r745501666
##########
File path: pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/VarByteChunkV4Test.java
##########
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.creator;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.function.BiConsumer;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.local.segment.index.readers.forward.VarByteChunkSVForwardIndexReaderV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class VarByteChunkV4Test {
+
+ private static final String DIR_NAME = System.getProperty("java.io.tmpdir") + File.separator
+ + "VarByteChunkV4Test";
Review comment:
Funnily enough I copied and pasted this from another test. I can change it if it makes you happy :)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r745500379
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
+
+ private final FieldSpec.DataType _valueType;
+ private final int _targetDecompressedChunkSize;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ private final PinotDataBuffer _metadata;
+ private final PinotDataBuffer _chunks;
+
+ public VarByteChunkSVForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.DataType valueType) {
+ if (dataBuffer.getInt(0) < VarByteChunkSVForwardIndexWriterV4.VERSION) {
+ throw new IllegalStateException("version " + dataBuffer.getInt(0) + " < "
+ + VarByteChunkSVForwardIndexWriterV4.VERSION);
+ }
+ _valueType = valueType;
+ _targetDecompressedChunkSize = dataBuffer.getInt(4);
+ _chunkCompressionType = ChunkCompressionType.valueOf(dataBuffer.getInt(8));
+ _chunkDecompressor = ChunkCompressorFactory.getDecompressor(_chunkCompressionType);
+ int chunksOffset = dataBuffer.getInt(12);
+ // the file has a BE header for compatability reasons (version selection) but the content is LE
+ _metadata = dataBuffer.view(16, chunksOffset, ByteOrder.LITTLE_ENDIAN);
+ _chunks = dataBuffer.view(chunksOffset, dataBuffer.size(), ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ public boolean isDictionaryEncoded() {
+ return false;
+ }
+
+ @Override
+ public boolean isSingleValue() {
+ return true;
+ }
+
+ @Override
+ public FieldSpec.DataType getValueType() {
+ return _valueType;
+ }
+
+ @Override
+ public String getString(int docId, ReaderContext context) {
+ return new String(context.getValue(docId), StandardCharsets.UTF_8);
Review comment:
A much more effective optimisation would be not to use strings at all, because even if we pool the `byte[]`, it gets copied immediately. There's also the question of the size of the buffer... I think Pinot suffers from buffer bloat right now, so I'm very reluctant to add another one. We can add one if we see this show up in profiles later. What I *don't* want to do is create a microbenchmark which inevitably will show reduced allocations, and add the buffer on the basis that allocations are reduced, without considering whether the allocation pressure is detrimental to QPS or macroscopic health of the server.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-956541373
@kkrugler that's great feedback. Given that the constraint was lifted last year when version 3 of the format was implemented, it would be interesting to know whether you would keep the controls in place for your use case (because there are benefits in doing so), or where you would draw the line if adjusting the partitioning limits.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (318c9d7) into [master](https://codecov.io/gh/apache/pinot/commit/db11e0fda8f27651b0b1fc30d0aea6a25bf7eccb?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (db11e0f) will **increase** coverage by `0.08%`.
> The diff coverage is `90.95%`.
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
============================================
+ Coverage 71.58% 71.67% +0.08%
- Complexity 4014 4045 +31
============================================
Files 1578 1580 +2
Lines 80074 80405 +331
Branches 11868 11922 +54
============================================
+ Hits 57323 57631 +308
- Misses 18887 18893 +6
- Partials 3864 3881 +17
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `29.29% <0.00%> (+0.23%)` | :arrow_up: |
| integration2 | `27.69% <0.00%> (-0.04%)` | :arrow_down: |
| unittests1 | `68.69% <90.95%> (+<0.01%)` | :arrow_up: |
| unittests2 | `14.50% <0.00%> (-0.07%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `80.00% <50.00%> (-20.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `36.50% <50.00%> (+1.42%)` | :arrow_up: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `91.58% <91.58%> (ø)` | |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `95.06% <95.06%> (ø)` | |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `92.64% <100.00%> (ø)` | |
| [...ot/core/query/pruner/ColumnValueSegmentPruner.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9xdWVyeS9wcnVuZXIvQ29sdW1uVmFsdWVTZWdtZW50UHJ1bmVyLmphdmE=) | `76.08% <0.00%> (-18.08%)` | :arrow_down: |
| [...unction/DistinctCountHLLMVAggregationFunction.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9xdWVyeS9hZ2dyZWdhdGlvbi9mdW5jdGlvbi9EaXN0aW5jdENvdW50SExMTVZBZ2dyZWdhdGlvbkZ1bmN0aW9uLmphdmE=) | `17.16% <0.00%> (-10.91%)` | :arrow_down: |
| [...rg/apache/pinot/broker/routing/RoutingManager.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtYnJva2VyL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9icm9rZXIvcm91dGluZy9Sb3V0aW5nTWFuYWdlci5qYXZh) | `87.09% <0.00%> (-3.23%)` | :arrow_down: |
| [.../java/org/apache/pinot/spi/data/TimeFieldSpec.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc3BpL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9zcGkvZGF0YS9UaW1lRmllbGRTcGVjLmphdmE=) | `88.63% <0.00%> (-2.28%)` | :arrow_down: |
| [.../function/DistinctCountHLLAggregationFunction.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9xdWVyeS9hZ2dyZWdhdGlvbi9mdW5jdGlvbi9EaXN0aW5jdENvdW50SExMQWdncmVnYXRpb25GdW5jdGlvbi5qYXZh) | `43.20% <0.00%> (-1.98%)` | :arrow_down: |
| ... and [31 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [db11e0f...318c9d7](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] kkrugler commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
kkrugler commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-956505930
Hi @richardstartin - Just FYI, I agree that 4GB of compressed data is large. But we hit the 2GB limit, due to storing crawled web pages (many of which were > 100K/page) in a column, when we had more than 100K rows (IIRC) in our per-month segment. We wound up sub-partitioning each month, which was fine, but added complexity to our ETL workflow.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743438263
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
Review comment:
Yes, good point.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r741910523
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
+ int numDocs = _nextDocId - _docIdOffset;
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ _chunkBuffer.putInt(0, numDocs);
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
Review comment:
See here: https://github.com/apache/pinot/pull/7661/files#diff-8ef4905133d398798c3bbacd29dfd80e0c687ec0b9f66352deeb4560be2e4365R96
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
Review comment:
See here: https://github.com/apache/pinot/pull/7661/files#diff-8ef4905133d398798c3bbacd29dfd80e0c687ec0b9f66352deeb4560be2e4365R140-R144
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
+ int numDocs = _nextDocId - _docIdOffset;
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ _chunkBuffer.putInt(0, numDocs);
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
Review comment:
See here: https://github.com/apache/pinot/pull/7661/files#diff-8ef4905133d398798c3bbacd29dfd80e0c687ec0b9f66352deeb4560be2e4365R96
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
Review comment:
See here: https://github.com/apache/pinot/pull/7661/files#diff-8ef4905133d398798c3bbacd29dfd80e0c687ec0b9f66352deeb4560be2e4365R140-R144
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (318c9d7) into [master](https://codecov.io/gh/apache/pinot/commit/db11e0fda8f27651b0b1fc30d0aea6a25bf7eccb?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (db11e0f) will **decrease** coverage by `1.16%`.
> The diff coverage is `90.95%`.
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
============================================
- Coverage 71.58% 70.42% -1.17%
- Complexity 4014 4045 +31
============================================
Files 1578 1580 +2
Lines 80074 80405 +331
Branches 11868 11922 +54
============================================
- Hits 57323 56627 -696
- Misses 18887 19911 +1024
- Partials 3864 3867 +3
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `?` | |
| integration2 | `27.69% <0.00%> (-0.04%)` | :arrow_down: |
| unittests1 | `68.69% <90.95%> (+<0.01%)` | :arrow_up: |
| unittests2 | `14.50% <0.00%> (-0.07%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `80.00% <50.00%> (-20.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `36.50% <50.00%> (+1.42%)` | :arrow_up: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `91.58% <91.58%> (ø)` | |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `95.06% <95.06%> (ø)` | |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `92.64% <100.00%> (ø)` | |
| [...pinot/minion/exception/TaskCancelledException.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtbWluaW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9taW5pb24vZXhjZXB0aW9uL1Rhc2tDYW5jZWxsZWRFeGNlcHRpb24uamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...nverttorawindex/ConvertToRawIndexTaskExecutor.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtcGx1Z2lucy9waW5vdC1taW5pb24tdGFza3MvcGlub3QtbWluaW9uLWJ1aWx0aW4tdGFza3Mvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3BsdWdpbi9taW5pb24vdGFza3MvY29udmVydHRvcmF3aW5kZXgvQ29udmVydFRvUmF3SW5kZXhUYXNrRXhlY3V0b3IuamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...e/pinot/common/minion/MergeRollupTaskMetadata.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWluaW9uL01lcmdlUm9sbHVwVGFza01ldGFkYXRhLmphdmE=) | `0.00% <0.00%> (-94.74%)` | :arrow_down: |
| [...plugin/segmentuploader/SegmentUploaderDefault.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtcGx1Z2lucy9waW5vdC1zZWdtZW50LXVwbG9hZGVyL3Bpbm90LXNlZ21lbnQtdXBsb2FkZXItZGVmYXVsdC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvcGx1Z2luL3NlZ21lbnR1cGxvYWRlci9TZWdtZW50VXBsb2FkZXJEZWZhdWx0LmphdmE=) | `0.00% <0.00%> (-87.10%)` | :arrow_down: |
| [.../transform/function/MapValueTransformFunction.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9vcGVyYXRvci90cmFuc2Zvcm0vZnVuY3Rpb24vTWFwVmFsdWVUcmFuc2Zvcm1GdW5jdGlvbi5qYXZh) | `0.00% <0.00%> (-85.30%)` | :arrow_down: |
| ... and [108 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [db11e0f...318c9d7](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r739593162
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
+ int numDocs = _nextDocId - _docIdOffset;
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ _chunkBuffer.putInt(0, numDocs);
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
Review comment:
Here we should check if we already put larger than 4G data and throw exception
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r740177595
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
+ int numDocs = _nextDocId - _docIdOffset;
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ _chunkBuffer.putInt(0, numDocs);
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
Review comment:
Indeed, actually, the check doesn't need to go here, but before the next attempt to store a value, because we can actually store slightly more than 4GB of compressed data, we just can't store an offset after 4GB has been breached.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] klsince commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
klsince commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r741338323
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
Review comment:
nit: add a comment that the two metadata entries are the starting docId of the chunk and the chunk offset.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
Review comment:
It may help to understand this method to describe a bit about the data layout before and after in method comment.
nit: save a few indents
```
if (_nextDocId <= _docIdOffset) {
return;
}
...
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] klsince commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
klsince commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r742140045
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a-bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a-bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
Review comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-956196251
> Since the target chunk size is much larger than the header size, I think it should not add much overhead to store long offset and remove the 4G limit for single index. We can also include the uncompressed size in the header in case some compressor does not include the length info in the compressed data.
There's a couple of things here:
* **Compression metadata** - this was the purpose of #7655 - to ensure that all formats we use have the correct metadata (3/4 already did) and enforce an upgrade path for `LZ4` when using this chunk format. So there's no need for any per-chunk compression metadata, and it factors into the next point
* **Offset sizes** - to me, 4GB of compressed chunks feels like a lot. At a compression ratio of 2x, that's 8GB raw data in a single segment, at 10x (JSON can be amazingly repetitive) it's 40GB. I am aware that in the past 32 bit offsets were shown not to be enough for some use cases, but they were signed offsets so only permitted 2GB compressed data. I am unaware of any evidence that 4GB would not have been enough. Why do I care? It's not for the sake of storage overhead, but I want to keep the metadata as small as possible in memory for the sake of searching it quickly. Can we frame the discussion in terms of why a user would want/need more than 4GB compressed data for a raw column in a single segment?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] codecov-commenter edited a comment on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
codecov-commenter edited a comment on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-954696416
# [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
> Merging [#7661](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (5848057) into [master](https://codecov.io/gh/apache/pinot/commit/e80198db7f71f6c043e4ffc728d6cfce6eb9b729?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (e80198d) will **decrease** coverage by `56.99%`.
> The diff coverage is `0.00%`.
> :exclamation: Current head 5848057 differs from pull request most recent head 95ad66b. Consider uploading reports for the commit 95ad66b to get more accurate results
[![Impacted file tree graph](https://codecov.io/gh/apache/pinot/pull/7661/graphs/tree.svg?width=650&height=150&src=pr&token=4ibza2ugkz&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
```diff
@@ Coverage Diff @@
## master #7661 +/- ##
=============================================
- Coverage 71.50% 14.50% -57.00%
+ Complexity 4030 80 -3950
=============================================
Files 1581 1537 -44
Lines 80400 78771 -1629
Branches 11943 11765 -178
=============================================
- Hits 57486 11427 -46059
- Misses 19026 66500 +47474
+ Partials 3888 844 -3044
```
| Flag | Coverage Δ | |
|---|---|---|
| integration1 | `?` | |
| integration2 | `?` | |
| unittests1 | `?` | |
| unittests2 | `14.50% <0.00%> (-0.04%)` | :arrow_down: |
Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
| [Impacted Files](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
|---|---|---|
| [.../writer/impl/VarByteChunkSVForwardIndexWriter.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlci5qYXZh) | `0.00% <ø> (-100.00%)` | :arrow_down: |
| [...riter/impl/VarByteChunkSVForwardIndexWriterV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9pby93cml0ZXIvaW1wbC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFdyaXRlclY0LmphdmE=) | `0.00% <0.00%> (ø)` | |
| [...r/impl/fwd/MultiValueFixedByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvTXVsdGlWYWx1ZUZpeGVkQnl0ZVJhd0luZGV4Q3JlYXRvci5qYXZh) | `0.00% <0.00%> (-93.03%)` | :arrow_down: |
| [...or/impl/fwd/SingleValueVarByteRawIndexCreator.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2NyZWF0b3IvaW1wbC9md2QvU2luZ2xlVmFsdWVWYXJCeXRlUmF3SW5kZXhDcmVhdG9yLmphdmE=) | `0.00% <0.00%> (-84.22%)` | :arrow_down: |
| [...ent/index/column/PhysicalColumnIndexContainer.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L2NvbHVtbi9QaHlzaWNhbENvbHVtbkluZGV4Q29udGFpbmVyLmphdmE=) | `0.00% <0.00%> (-81.76%)` | :arrow_down: |
| [...readers/forward/BaseChunkSVForwardIndexReader.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9CYXNlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlci5qYXZh) | `0.00% <0.00%> (-92.65%)` | :arrow_down: |
| [...rs/forward/VarByteChunkSVForwardIndexReaderV4.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1sb2NhbC9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VnbWVudC9sb2NhbC9zZWdtZW50L2luZGV4L3JlYWRlcnMvZm9yd2FyZC9WYXJCeXRlQ2h1bmtTVkZvcndhcmRJbmRleFJlYWRlclY0LmphdmE=) | `0.00% <0.00%> (ø)` | |
| [.../segment/spi/compression/ChunkCompressionType.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL2NvbXByZXNzaW9uL0NodW5rQ29tcHJlc3Npb25UeXBlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| [...g/apache/pinot/segment/spi/memory/CleanerUtil.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3Qtc2VnbWVudC1zcGkvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL3Bpbm90L3NlZ21lbnQvc3BpL21lbW9yeS9DbGVhbmVyVXRpbC5qYXZh) | `0.00% <0.00%> (-35.09%)` | :arrow_down: |
| [...c/main/java/org/apache/pinot/common/tier/Tier.java](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vdGllci9UaWVyLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
| ... and [1259 more](https://codecov.io/gh/apache/pinot/pull/7661/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
------
[Continue to review full report at Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
> **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
> `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
> Powered by [Codecov](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=footer&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Last update [e80198d...95ad66b](https://codecov.io/gh/apache/pinot/pull/7661?src=pr&el=lastupdated&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743446397
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkWriter.java
##########
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.Closeable;
+
+
+public interface VarByteChunkWriter extends Closeable {
+ void putString(String value);
Review comment:
I inherited this behaviour from the current variable length writer so I could extract an interface without changing its behaviour.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
Review comment:
See above.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r741910523
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
+ int numDocs = _nextDocId - _docIdOffset;
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ _chunkBuffer.putInt(0, numDocs);
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
Review comment:
See here: https://github.com/apache/pinot/pull/7661/files#diff-8ef4905133d398798c3bbacd29dfd80e0c687ec0b9f66352deeb4560be2e4365R96
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ writeChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void writeChunk() {
+ if (_nextDocId > _docIdOffset) {
Review comment:
See here: https://github.com/apache/pinot/pull/7661/files#diff-8ef4905133d398798c3bbacd29dfd80e0c687ec0b9f66352deeb4560be2e4365R140-R144
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-960155321
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-956541373
@kkrugler that's great feedback. Given that the constraint was lifted last year when version 3 of the format was implemented, it would be interesting to know whether you would keep the controls in place for your use case (because there are benefits in doing so), or where you would draw the line if adjusting the partitioning limits.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] kkrugler commented on pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
kkrugler commented on pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#issuecomment-960118748
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743443181
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
Well, this was compensating for another bug, it's fixed now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743434145
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
No, this would result in an off by one (or off by 4 really) bug because of the offset for the number of docs at the start of the buffer. In fact, try it yourself and run the tests.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
Review comment:
Yes, good point.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while compressing/writing data chunk", e);
+ throw new RuntimeException(e);
+ } finally {
+ CleanerUtil.cleanQuietly(target);
+ }
+ }
+
+ private void clearChunkBuffer() {
+ _chunkBuffer.clear();
+ _chunkBuffer.position(Integer.BYTES);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ flushChunk();
+ // write out where the chunks start into slot reserved at offset 12
+ _output.seek(3 * Integer.BYTES);
+ _output.writeInt(_metadataSize);
+ _output.seek(_metadataSize);
+ _dataChannel.truncate(_chunkOffset);
+ _output.setLength(_metadataSize + _chunkOffset);
+ long total = _chunkOffset;
+ long position = 0;
+ while (total > 0) {
+ long transferred = _dataChannel.transferTo(position, total, _output.getChannel());
+ total -= transferred;
+ position += transferred;
+ }
+ _dataChannel.close();
+ _output.close();
+ FileUtils.deleteQuietly(_dataBuffer);
Review comment:
Good catch.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
Review comment:
I agree
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
Well, this was compensating for another bug, it's fixed now.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkWriter.java
##########
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.Closeable;
+
+
+public interface VarByteChunkWriter extends Closeable {
+ void putString(String value);
Review comment:
I inherited this behaviour from the current variable length writer so I could extract an interface without changing its behaviour.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
Review comment:
See above.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r745184469
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
+
+ private final FieldSpec.DataType _valueType;
+ private final int _targetDecompressedChunkSize;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ private final PinotDataBuffer _metadata;
+ private final PinotDataBuffer _chunks;
+
+ public VarByteChunkSVForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.DataType valueType) {
+ if (dataBuffer.getInt(0) < VarByteChunkSVForwardIndexWriterV4.VERSION) {
+ throw new IllegalStateException("version " + dataBuffer.getInt(0) + " < "
+ + VarByteChunkSVForwardIndexWriterV4.VERSION);
+ }
+ _valueType = valueType;
+ _targetDecompressedChunkSize = dataBuffer.getInt(4);
+ _chunkCompressionType = ChunkCompressionType.valueOf(dataBuffer.getInt(8));
+ _chunkDecompressor = ChunkCompressorFactory.getDecompressor(_chunkCompressionType);
+ int chunksOffset = dataBuffer.getInt(12);
+ // the file has a BE header for compatability reasons (version selection) but the content is LE
+ _metadata = dataBuffer.view(16, chunksOffset, ByteOrder.LITTLE_ENDIAN);
+ _chunks = dataBuffer.view(chunksOffset, dataBuffer.size(), ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ public boolean isDictionaryEncoded() {
+ return false;
+ }
+
+ @Override
+ public boolean isSingleValue() {
+ return true;
+ }
+
+ @Override
+ public FieldSpec.DataType getValueType() {
+ return _valueType;
+ }
+
+ @Override
+ public String getString(int docId, ReaderContext context) {
+ return new String(context.getValue(docId), StandardCharsets.UTF_8);
+ }
+
+ @Override
+ public byte[] getBytes(int docId, ReaderContext context) {
+ return context.getValue(docId);
+ }
+
+ @Nullable
+ @Override
+ public ReaderContext createContext() {
+ return _chunkCompressionType == ChunkCompressionType.PASS_THROUGH
+ ? new UncompressedReaderContext(_chunks, _metadata)
+ : new CompressedReaderContext(_metadata, _chunks, _chunkDecompressor, _chunkCompressionType,
+ _targetDecompressedChunkSize);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ }
+
+ public static abstract class ReaderContext implements ForwardIndexReaderContext {
+
+ protected final PinotDataBuffer _chunks;
+ protected final PinotDataBuffer _metadata;
+ protected int _docIdOffset;
+ protected int _nextDocIdOffset;
+ protected boolean _regularChunk;
+ protected int _numDocsInCurrentChunk;
+
+ protected ReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) {
+ _chunks = chunks;
+ _metadata = metadata;
+ }
+
+ public byte[] getValue(int docId) {
+ if (docId >= _docIdOffset && docId < _nextDocIdOffset) {
+ return readSmallUncompressedValue(docId);
+ } else {
+ try {
+ return decompressAndRead(docId);
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while decompressing data chunk", e);
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ protected long chunkIndexFor(int docId) {
+ long low = 0;
+ long high = (_metadata.size() / METADATA_ENTRY_SIZE) - 1;
+ while (low <= high) {
+ long mid = (low + high) >>> 1;
+ long position = mid * METADATA_ENTRY_SIZE;
+ int midDocId = _metadata.getInt(position) & 0x7FFFFFFF;
+ if (midDocId < docId) {
+ low = mid + 1;
+ } else if (midDocId > docId) {
+ high = mid - 1;
+ } else {
+ return position;
+ }
+ }
+ return (low - 1) * METADATA_ENTRY_SIZE;
+ }
+
+ protected abstract byte[] processChunkAndReadFirstValue(int docId, long offset, long limit)
+ throws IOException;
+
+ protected abstract byte[] readSmallUncompressedValue(int docId);
+
+ private byte[] decompressAndRead(int docId)
+ throws IOException {
+ long metadataEntry = chunkIndexFor(docId);
+ int info = _metadata.getInt(metadataEntry);
+ _docIdOffset = info & 0x7FFFFFFF;
+ _regularChunk = _docIdOffset == info;
+ long offset = _metadata.getInt(metadataEntry + Integer.BYTES) & 0xFFFFFFFFL;
+ long limit;
+ if (_metadata.size() - METADATA_ENTRY_SIZE > metadataEntry) {
+ _nextDocIdOffset = _metadata.getInt(metadataEntry + METADATA_ENTRY_SIZE) & 0x7FFFFFFF;
+ limit = _metadata.getInt(metadataEntry + METADATA_ENTRY_SIZE + Integer.BYTES) & 0xFFFFFFFFL;
+ } else {
+ _nextDocIdOffset = Integer.MAX_VALUE;
+ limit = _chunks.size();
+ }
+ return processChunkAndReadFirstValue(docId, offset, limit);
+ }
+ }
+
+ private static final class UncompressedReaderContext extends ReaderContext {
+
+ private ByteBuffer _chunk;
+
+ UncompressedReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) {
+ super(chunks, metadata);
+ }
+
+ @Override
+ protected byte[] processChunkAndReadFirstValue(int docId, long offset, long limit) {
+ _chunk = _chunks.toDirectByteBuffer(offset, (int) (limit - offset));
+ if (!_regularChunk) {
+ return readHugeValue();
+ }
+ _numDocsInCurrentChunk = _chunk.getInt(0);
+ return readSmallUncompressedValue(docId);
+ }
+
+ private byte[] readHugeValue() {
+ byte[] value = new byte[_chunk.capacity()];
+ _chunk.get(value);
+ return value;
+ }
+
+ @Override
+ protected byte[] readSmallUncompressedValue(int docId) {
+ int index = docId - _docIdOffset;
+ int offset = _chunk.getInt((index + 1) * Integer.BYTES);
+ int nextOffset = index == _numDocsInCurrentChunk - 1
+ ? _chunk.limit()
+ : _chunk.getInt((index + 2) * Integer.BYTES);
+ ByteBuffer view = _chunk.duplicate();
+ view.position(offset);
+ view.order(ByteOrder.LITTLE_ENDIAN);
+ byte[] bytes = new byte[nextOffset - offset];
+ view.get(bytes);
+ return bytes;
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ }
+ }
+
+ private static final class CompressedReaderContext extends ReaderContext {
+
+ private final ByteBuffer _decompressedBuffer;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ CompressedReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks, ChunkDecompressor chunkDecompressor,
+ ChunkCompressionType chunkCompressionType, int targetChunkSize) {
+ super(metadata, chunks);
+ _chunkDecompressor = chunkDecompressor;
+ _chunkCompressionType = chunkCompressionType;
+ _decompressedBuffer = ByteBuffer.allocateDirect(targetChunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ protected byte[] processChunkAndReadFirstValue(int docId, long offset, long limit)
+ throws IOException {
+ _decompressedBuffer.clear();
+ ByteBuffer compressed = _chunks.toDirectByteBuffer(offset, (int) (limit - offset));
+ int decompressedLength = _chunkDecompressor.decompressedLength(compressed);
Review comment:
`decompressedLength` is only needed for huge values, suggest moving it after the if check (not sure if java compiler is able to skip this line for regular chunk)
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
+
+ private final FieldSpec.DataType _valueType;
+ private final int _targetDecompressedChunkSize;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ private final PinotDataBuffer _metadata;
+ private final PinotDataBuffer _chunks;
+
+ public VarByteChunkSVForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.DataType valueType) {
+ if (dataBuffer.getInt(0) < VarByteChunkSVForwardIndexWriterV4.VERSION) {
+ throw new IllegalStateException("version " + dataBuffer.getInt(0) + " < "
+ + VarByteChunkSVForwardIndexWriterV4.VERSION);
+ }
+ _valueType = valueType;
+ _targetDecompressedChunkSize = dataBuffer.getInt(4);
+ _chunkCompressionType = ChunkCompressionType.valueOf(dataBuffer.getInt(8));
+ _chunkDecompressor = ChunkCompressorFactory.getDecompressor(_chunkCompressionType);
+ int chunksOffset = dataBuffer.getInt(12);
+ // the file has a BE header for compatability reasons (version selection) but the content is LE
+ _metadata = dataBuffer.view(16, chunksOffset, ByteOrder.LITTLE_ENDIAN);
+ _chunks = dataBuffer.view(chunksOffset, dataBuffer.size(), ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ public boolean isDictionaryEncoded() {
+ return false;
+ }
+
+ @Override
+ public boolean isSingleValue() {
+ return true;
+ }
+
+ @Override
+ public FieldSpec.DataType getValueType() {
+ return _valueType;
+ }
+
+ @Override
+ public String getString(int docId, ReaderContext context) {
+ return new String(context.getValue(docId), StandardCharsets.UTF_8);
+ }
+
+ @Override
+ public byte[] getBytes(int docId, ReaderContext context) {
+ return context.getValue(docId);
+ }
+
+ @Nullable
+ @Override
+ public ReaderContext createContext() {
+ return _chunkCompressionType == ChunkCompressionType.PASS_THROUGH
+ ? new UncompressedReaderContext(_chunks, _metadata)
+ : new CompressedReaderContext(_metadata, _chunks, _chunkDecompressor, _chunkCompressionType,
+ _targetDecompressedChunkSize);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ }
+
+ public static abstract class ReaderContext implements ForwardIndexReaderContext {
+
+ protected final PinotDataBuffer _chunks;
+ protected final PinotDataBuffer _metadata;
+ protected int _docIdOffset;
+ protected int _nextDocIdOffset;
+ protected boolean _regularChunk;
+ protected int _numDocsInCurrentChunk;
+
+ protected ReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) {
+ _chunks = chunks;
+ _metadata = metadata;
+ }
+
+ public byte[] getValue(int docId) {
+ if (docId >= _docIdOffset && docId < _nextDocIdOffset) {
+ return readSmallUncompressedValue(docId);
+ } else {
+ try {
+ return decompressAndRead(docId);
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while decompressing data chunk", e);
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ protected long chunkIndexFor(int docId) {
+ long low = 0;
+ long high = (_metadata.size() / METADATA_ENTRY_SIZE) - 1;
+ while (low <= high) {
+ long mid = (low + high) >>> 1;
+ long position = mid * METADATA_ENTRY_SIZE;
+ int midDocId = _metadata.getInt(position) & 0x7FFFFFFF;
+ if (midDocId < docId) {
+ low = mid + 1;
+ } else if (midDocId > docId) {
+ high = mid - 1;
+ } else {
+ return position;
+ }
+ }
+ return (low - 1) * METADATA_ENTRY_SIZE;
+ }
+
+ protected abstract byte[] processChunkAndReadFirstValue(int docId, long offset, long limit)
+ throws IOException;
+
+ protected abstract byte[] readSmallUncompressedValue(int docId);
+
+ private byte[] decompressAndRead(int docId)
+ throws IOException {
+ long metadataEntry = chunkIndexFor(docId);
+ int info = _metadata.getInt(metadataEntry);
+ _docIdOffset = info & 0x7FFFFFFF;
+ _regularChunk = _docIdOffset == info;
+ long offset = _metadata.getInt(metadataEntry + Integer.BYTES) & 0xFFFFFFFFL;
+ long limit;
+ if (_metadata.size() - METADATA_ENTRY_SIZE > metadataEntry) {
+ _nextDocIdOffset = _metadata.getInt(metadataEntry + METADATA_ENTRY_SIZE) & 0x7FFFFFFF;
+ limit = _metadata.getInt(metadataEntry + METADATA_ENTRY_SIZE + Integer.BYTES) & 0xFFFFFFFFL;
+ } else {
+ _nextDocIdOffset = Integer.MAX_VALUE;
+ limit = _chunks.size();
+ }
+ return processChunkAndReadFirstValue(docId, offset, limit);
+ }
+ }
+
+ private static final class UncompressedReaderContext extends ReaderContext {
+
+ private ByteBuffer _chunk;
+
+ UncompressedReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) {
+ super(chunks, metadata);
+ }
+
+ @Override
+ protected byte[] processChunkAndReadFirstValue(int docId, long offset, long limit) {
+ _chunk = _chunks.toDirectByteBuffer(offset, (int) (limit - offset));
+ if (!_regularChunk) {
+ return readHugeValue();
+ }
+ _numDocsInCurrentChunk = _chunk.getInt(0);
+ return readSmallUncompressedValue(docId);
+ }
+
+ private byte[] readHugeValue() {
+ byte[] value = new byte[_chunk.capacity()];
+ _chunk.get(value);
+ return value;
+ }
+
+ @Override
+ protected byte[] readSmallUncompressedValue(int docId) {
+ int index = docId - _docIdOffset;
+ int offset = _chunk.getInt((index + 1) * Integer.BYTES);
+ int nextOffset = index == _numDocsInCurrentChunk - 1
+ ? _chunk.limit()
+ : _chunk.getInt((index + 2) * Integer.BYTES);
+ ByteBuffer view = _chunk.duplicate();
Review comment:
Since it is always single-threaded, we can directly read from the `_chunk` without duplicating it
##########
File path: pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/VarByteChunkV4Test.java
##########
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.creator;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.function.BiConsumer;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.local.segment.index.readers.forward.VarByteChunkSVForwardIndexReaderV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class VarByteChunkV4Test {
+
+ private static final String DIR_NAME = System.getProperty("java.io.tmpdir") + File.separator
+ + "VarByteChunkV4Test";
Review comment:
Consistent with other tests
```suggestion
private static final String DIR_NAME = new File(FileUtils.getTempDirectory(), "VarByteChunkV4Test");
```
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
+
+ private final FieldSpec.DataType _valueType;
+ private final int _targetDecompressedChunkSize;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ private final PinotDataBuffer _metadata;
+ private final PinotDataBuffer _chunks;
+
+ public VarByteChunkSVForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.DataType valueType) {
+ if (dataBuffer.getInt(0) < VarByteChunkSVForwardIndexWriterV4.VERSION) {
+ throw new IllegalStateException("version " + dataBuffer.getInt(0) + " < "
+ + VarByteChunkSVForwardIndexWriterV4.VERSION);
+ }
+ _valueType = valueType;
+ _targetDecompressedChunkSize = dataBuffer.getInt(4);
+ _chunkCompressionType = ChunkCompressionType.valueOf(dataBuffer.getInt(8));
+ _chunkDecompressor = ChunkCompressorFactory.getDecompressor(_chunkCompressionType);
+ int chunksOffset = dataBuffer.getInt(12);
+ // the file has a BE header for compatability reasons (version selection) but the content is LE
+ _metadata = dataBuffer.view(16, chunksOffset, ByteOrder.LITTLE_ENDIAN);
+ _chunks = dataBuffer.view(chunksOffset, dataBuffer.size(), ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ public boolean isDictionaryEncoded() {
+ return false;
+ }
+
+ @Override
+ public boolean isSingleValue() {
+ return true;
+ }
+
+ @Override
+ public FieldSpec.DataType getValueType() {
+ return _valueType;
+ }
+
+ @Override
+ public String getString(int docId, ReaderContext context) {
+ return new String(context.getValue(docId), StandardCharsets.UTF_8);
Review comment:
When reading string values, we can reuse the bytes (see `VarByteChunkSVForwardIndexReader` for details).
There are pros and cons of reusing the bytes. I'm not sure how well the modern JVM handles allocation and collection of these short-lived objects. Suggest adding some comments if you intentionally not reuse the bytes.
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
+
+ private final FieldSpec.DataType _valueType;
+ private final int _targetDecompressedChunkSize;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ private final PinotDataBuffer _metadata;
+ private final PinotDataBuffer _chunks;
+
+ public VarByteChunkSVForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.DataType valueType) {
+ if (dataBuffer.getInt(0) < VarByteChunkSVForwardIndexWriterV4.VERSION) {
+ throw new IllegalStateException("version " + dataBuffer.getInt(0) + " < "
+ + VarByteChunkSVForwardIndexWriterV4.VERSION);
+ }
+ _valueType = valueType;
+ _targetDecompressedChunkSize = dataBuffer.getInt(4);
+ _chunkCompressionType = ChunkCompressionType.valueOf(dataBuffer.getInt(8));
+ _chunkDecompressor = ChunkCompressorFactory.getDecompressor(_chunkCompressionType);
+ int chunksOffset = dataBuffer.getInt(12);
+ // the file has a BE header for compatability reasons (version selection) but the content is LE
+ _metadata = dataBuffer.view(16, chunksOffset, ByteOrder.LITTLE_ENDIAN);
+ _chunks = dataBuffer.view(chunksOffset, dataBuffer.size(), ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ public boolean isDictionaryEncoded() {
+ return false;
+ }
+
+ @Override
+ public boolean isSingleValue() {
+ return true;
+ }
+
+ @Override
+ public FieldSpec.DataType getValueType() {
+ return _valueType;
+ }
+
+ @Override
+ public String getString(int docId, ReaderContext context) {
+ return new String(context.getValue(docId), StandardCharsets.UTF_8);
+ }
+
+ @Override
+ public byte[] getBytes(int docId, ReaderContext context) {
+ return context.getValue(docId);
+ }
+
+ @Nullable
+ @Override
+ public ReaderContext createContext() {
+ return _chunkCompressionType == ChunkCompressionType.PASS_THROUGH
+ ? new UncompressedReaderContext(_chunks, _metadata)
+ : new CompressedReaderContext(_metadata, _chunks, _chunkDecompressor, _chunkCompressionType,
+ _targetDecompressedChunkSize);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ }
+
+ public static abstract class ReaderContext implements ForwardIndexReaderContext {
+
+ protected final PinotDataBuffer _chunks;
+ protected final PinotDataBuffer _metadata;
+ protected int _docIdOffset;
+ protected int _nextDocIdOffset;
+ protected boolean _regularChunk;
+ protected int _numDocsInCurrentChunk;
+
+ protected ReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) {
+ _chunks = chunks;
+ _metadata = metadata;
+ }
+
+ public byte[] getValue(int docId) {
+ if (docId >= _docIdOffset && docId < _nextDocIdOffset) {
+ return readSmallUncompressedValue(docId);
+ } else {
+ try {
+ return decompressAndRead(docId);
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while decompressing data chunk", e);
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ protected long chunkIndexFor(int docId) {
+ long low = 0;
+ long high = (_metadata.size() / METADATA_ENTRY_SIZE) - 1;
+ while (low <= high) {
+ long mid = (low + high) >>> 1;
+ long position = mid * METADATA_ENTRY_SIZE;
+ int midDocId = _metadata.getInt(position) & 0x7FFFFFFF;
+ if (midDocId < docId) {
+ low = mid + 1;
+ } else if (midDocId > docId) {
+ high = mid - 1;
+ } else {
+ return position;
+ }
+ }
+ return (low - 1) * METADATA_ENTRY_SIZE;
+ }
+
+ protected abstract byte[] processChunkAndReadFirstValue(int docId, long offset, long limit)
+ throws IOException;
+
+ protected abstract byte[] readSmallUncompressedValue(int docId);
+
+ private byte[] decompressAndRead(int docId)
+ throws IOException {
+ long metadataEntry = chunkIndexFor(docId);
+ int info = _metadata.getInt(metadataEntry);
+ _docIdOffset = info & 0x7FFFFFFF;
+ _regularChunk = _docIdOffset == info;
+ long offset = _metadata.getInt(metadataEntry + Integer.BYTES) & 0xFFFFFFFFL;
+ long limit;
+ if (_metadata.size() - METADATA_ENTRY_SIZE > metadataEntry) {
+ _nextDocIdOffset = _metadata.getInt(metadataEntry + METADATA_ENTRY_SIZE) & 0x7FFFFFFF;
+ limit = _metadata.getInt(metadataEntry + METADATA_ENTRY_SIZE + Integer.BYTES) & 0xFFFFFFFFL;
+ } else {
+ _nextDocIdOffset = Integer.MAX_VALUE;
+ limit = _chunks.size();
+ }
+ return processChunkAndReadFirstValue(docId, offset, limit);
+ }
+ }
+
+ private static final class UncompressedReaderContext extends ReaderContext {
+
+ private ByteBuffer _chunk;
+
+ UncompressedReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) {
+ super(chunks, metadata);
+ }
+
+ @Override
+ protected byte[] processChunkAndReadFirstValue(int docId, long offset, long limit) {
+ _chunk = _chunks.toDirectByteBuffer(offset, (int) (limit - offset));
+ if (!_regularChunk) {
+ return readHugeValue();
+ }
+ _numDocsInCurrentChunk = _chunk.getInt(0);
+ return readSmallUncompressedValue(docId);
+ }
+
+ private byte[] readHugeValue() {
+ byte[] value = new byte[_chunk.capacity()];
+ _chunk.get(value);
+ return value;
+ }
+
+ @Override
+ protected byte[] readSmallUncompressedValue(int docId) {
+ int index = docId - _docIdOffset;
+ int offset = _chunk.getInt((index + 1) * Integer.BYTES);
+ int nextOffset = index == _numDocsInCurrentChunk - 1
+ ? _chunk.limit()
+ : _chunk.getInt((index + 2) * Integer.BYTES);
+ ByteBuffer view = _chunk.duplicate();
+ view.position(offset);
+ view.order(ByteOrder.LITTLE_ENDIAN);
+ byte[] bytes = new byte[nextOffset - offset];
+ view.get(bytes);
+ return bytes;
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ }
+ }
+
+ private static final class CompressedReaderContext extends ReaderContext {
+
+ private final ByteBuffer _decompressedBuffer;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ CompressedReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks, ChunkDecompressor chunkDecompressor,
+ ChunkCompressionType chunkCompressionType, int targetChunkSize) {
+ super(metadata, chunks);
+ _chunkDecompressor = chunkDecompressor;
+ _chunkCompressionType = chunkCompressionType;
+ _decompressedBuffer = ByteBuffer.allocateDirect(targetChunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ protected byte[] processChunkAndReadFirstValue(int docId, long offset, long limit)
+ throws IOException {
+ _decompressedBuffer.clear();
+ ByteBuffer compressed = _chunks.toDirectByteBuffer(offset, (int) (limit - offset));
+ int decompressedLength = _chunkDecompressor.decompressedLength(compressed);
+ if (_regularChunk) {
+ _chunkDecompressor.decompress(compressed, _decompressedBuffer);
+ _numDocsInCurrentChunk = _decompressedBuffer.getInt(0);
+ return readSmallUncompressedValue(docId);
+ }
+ // huge value, no benefit from buffering, return the whole thing
+ return readHugeCompressedValue(compressed, decompressedLength);
+ }
+
+ @Override
+ protected byte[] readSmallUncompressedValue(int docId) {
+ int index = docId - _docIdOffset;
+ int offset = _decompressedBuffer.getInt((index + 1) * Integer.BYTES);
+ int nextOffset = index == _numDocsInCurrentChunk - 1
+ ? _decompressedBuffer.limit()
+ : _decompressedBuffer.getInt((index + 2) * Integer.BYTES);
+ ByteBuffer view = _decompressedBuffer.duplicate();
Review comment:
Since it is always single-threaded, we can directly read from the `_chunk` without duplicating it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743358730
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
Review comment:
I think this should be `>`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
Review comment:
Should this also be `>`?
Suggest adding some comments on why do we need to reserve 4 extra bytes (I assume it is for the `numValuesInChunk` header)
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
Review comment:
(minor) Suggest adding parentheses for readability
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
Review comment:
Should this be:
```suggestion
source.position(copyFrom).limit(copyFrom + valueLengths[i]);
```
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while compressing/writing data chunk", e);
+ throw new RuntimeException(e);
+ } finally {
+ CleanerUtil.cleanQuietly(target);
+ }
+ }
+
+ private void clearChunkBuffer() {
+ _chunkBuffer.clear();
+ _chunkBuffer.position(Integer.BYTES);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ flushChunk();
+ // write out where the chunks start into slot reserved at offset 12
+ _output.seek(3 * Integer.BYTES);
+ _output.writeInt(_metadataSize);
+ _output.seek(_metadataSize);
+ _dataChannel.truncate(_chunkOffset);
+ _output.setLength(_metadataSize + _chunkOffset);
+ long total = _chunkOffset;
+ long position = 0;
+ while (total > 0) {
+ long transferred = _dataChannel.transferTo(position, total, _output.getChannel());
+ total -= transferred;
+ position += transferred;
+ }
+ _dataChannel.close();
+ _output.close();
+ FileUtils.deleteQuietly(_dataBuffer);
Review comment:
Should also release the `_chunkBuffer`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
Review comment:
For a write method, might be better to throw the `IOException` and expect the caller to handle it
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
Review comment:
Why do we need to allocate a separate direct buffer for these 2 compression types? Some comments would be good
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkWriter.java
##########
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import java.io.Closeable;
+
+
+public interface VarByteChunkWriter extends Closeable {
+ void putString(String value);
Review comment:
As a writer interface, I feel it might be better to allow these 2 methods to throw `IOException`
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
Review comment:
The offsets can be updated along with the value copy, since we need to position the `_chunkBuffer` at the final offset
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743438340
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
+ flushChunk();
+ if (sizeRequired >= _chunkBuffer.capacity() - Integer.BYTES) {
+ writeHugeChunk(bytes);
+ return;
+ }
+ }
+ _chunkBuffer.putInt(bytes.length);
+ _chunkBuffer.put(bytes);
+ _nextDocId++;
+ }
+
+ private void writeHugeChunk(byte[] bytes) {
+ // huge values where the bytes and their length prefix don't fit in to the remainder of the buffer after the prefix
+ // for the number of documents in a regular chunk are written as a single value without metadata, and these chunks
+ // are detected by marking the MSB in the doc id offset
+ final ByteBuffer buffer;
+ if (_chunkCompressor.compressionType() == ChunkCompressionType.SNAPPY
+ || _chunkCompressor.compressionType() == ChunkCompressionType.ZSTANDARD) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ buffer.put(bytes);
+ buffer.flip();
+ } else {
+ // cast for JDK8 javac compatibility
+ buffer = (ByteBuffer) ByteBuffer.wrap(bytes);
+ }
+ try {
+ _nextDocId++;
+ write(buffer, true);
+ } finally {
+ CleanerUtil.cleanQuietly(buffer);
+ }
+ }
+
+ private void flushChunk() {
+ if (_nextDocId > _docIdOffset) {
+ writeChunk();
+ }
+ }
+
+ private void writeChunk() {
+ /*
+ This method translates from the current state of the buffer, assuming there are 3 values of lengths a,b, and c:
+ [-][a][a bytes][b][b bytes][c][c bytes]
+ to:
+ [3][16][a+16][a+b+16][a bytes][b bytes][c bytes]
+ [------16-bytes-----][----a+b+c bytes----------]
+ */
+ int numDocs = _nextDocId - _docIdOffset;
+ _chunkBuffer.putInt(0, numDocs);
+ // collect lengths
+ int[] valueLengths = new int[numDocs];
+ int[] offsets = new int[numDocs];
+ int offset = Integer.BYTES;
+ for (int i = 0; i < numDocs; i++) {
+ offsets[i] = offset;
+ int size = _chunkBuffer.getInt(offset);
+ valueLengths[i] = size;
+ offset += size + Integer.BYTES;
+ }
+ // now iterate backwards shifting variable length content backwards to make space for prefixes at the start
+ // this pays for itself by allowing random access to readers
+ int limit = _chunkBuffer.position();
+ int accumulatedOffset = Integer.BYTES;
+ for (int i = numDocs - 2; i >= 0; i--) {
+ ByteBuffer source = _chunkBuffer.duplicate();
+ int copyFrom = offsets[i] + Integer.BYTES;
+ source.position(offsets[i]).limit(copyFrom + valueLengths[i]);
+ _chunkBuffer.position(offsets[i] + accumulatedOffset);
+ _chunkBuffer.put(source.slice());
+ accumulatedOffset += Integer.BYTES;
+ }
+ // compute byte offsets of each string from lengths
+ int metadataOffset = Integer.BYTES * (numDocs + 1);
+ offsets[0] = metadataOffset;
+ int cumulativeLength = valueLengths[0];
+ for (int i = 1; i < offsets.length; i++) {
+ offsets[i] = metadataOffset + cumulativeLength;
+ cumulativeLength += valueLengths[i];
+ }
+ // write the lengths into the space created at the front
+ for (int i = 0; i < offsets.length; i++) {
+ _chunkBuffer.putInt(Integer.BYTES * (i + 1), offsets[i]);
+ }
+ _chunkBuffer.position(0);
+ _chunkBuffer.limit(limit);
+ write(_chunkBuffer, false);
+ clearChunkBuffer();
+ }
+
+ private void write(ByteBuffer buffer, boolean huge) {
+ int maxCompressedSize = _chunkCompressor.maxCompressedSize(buffer.limit());
+ ByteBuffer target = null;
+ try {
+ target = _dataChannel.map(FileChannel.MapMode.READ_WRITE, _chunkOffset, maxCompressedSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ int compressedSize = _chunkCompressor.compress(buffer, target);
+ // reverse bytes here because the file writes BE and we want to read the metadata LE
+ _output.writeInt(Integer.reverseBytes(_docIdOffset | (huge ? 0x80000000 : 0)));
+ _output.writeInt(Integer.reverseBytes((int) (_chunkOffset & 0xFFFFFFFFL)));
+ _metadataSize += 8;
+ _chunkOffset += compressedSize;
+ _docIdOffset = _nextDocId;
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while compressing/writing data chunk", e);
+ throw new RuntimeException(e);
+ } finally {
+ CleanerUtil.cleanQuietly(target);
+ }
+ }
+
+ private void clearChunkBuffer() {
+ _chunkBuffer.clear();
+ _chunkBuffer.position(Integer.BYTES);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ flushChunk();
+ // write out where the chunks start into slot reserved at offset 12
+ _output.seek(3 * Integer.BYTES);
+ _output.writeInt(_metadataSize);
+ _output.seek(_metadataSize);
+ _dataChannel.truncate(_chunkOffset);
+ _output.setLength(_metadataSize + _chunkOffset);
+ long total = _chunkOffset;
+ long position = 0;
+ while (total > 0) {
+ long transferred = _dataChannel.transferTo(position, total, _output.getChannel());
+ total -= transferred;
+ position += transferred;
+ }
+ _dataChannel.close();
+ _output.close();
+ FileUtils.deleteQuietly(_dataBuffer);
Review comment:
Good catch.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r743438666
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/io/writer/impl/VarByteChunkSVForwardIndexWriterV4.java
##########
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.io.writer.impl;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkCompressor;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Class to write out variable length bytes into a single column.
+ *
+ *
+ * Only sequential writes are supported.
+ */
+@NotThreadSafe
+public class VarByteChunkSVForwardIndexWriterV4 implements VarByteChunkWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexWriterV4.class);
+
+ public static final int VERSION = 4;
+
+ private static final String DATA_BUFFER_SUFFIX = ".buf";
+
+ private final File _dataBuffer;
+ private final RandomAccessFile _output;
+ private final FileChannel _dataChannel;
+ private final ByteBuffer _chunkBuffer;
+ private final ChunkCompressor _chunkCompressor;
+
+ private int _docIdOffset = 0;
+ private int _nextDocId = 0;
+ private int _metadataSize = 0;
+ private long _chunkOffset = 0;
+
+ public VarByteChunkSVForwardIndexWriterV4(File file, ChunkCompressionType compressionType, int chunkSize)
+ throws IOException {
+ _dataBuffer = new File(file.getName() + DATA_BUFFER_SUFFIX);
+ _output = new RandomAccessFile(file, "rw");
+ _dataChannel = new RandomAccessFile(_dataBuffer, "rw").getChannel();
+ _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType, true);
+ _chunkBuffer = ByteBuffer.allocateDirect(chunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ // reserve space for numDocs
+ _chunkBuffer.position(Integer.BYTES);
+ writeHeader(_chunkCompressor.compressionType(), chunkSize);
+ }
+
+ private void writeHeader(ChunkCompressionType compressionType, int targetDecompressedChunkSize)
+ throws IOException {
+ // keep metadata BE for backwards compatibility
+ // (e.g. the version needs to be read by a factory which assumes BE)
+ _output.writeInt(VERSION);
+ _output.writeInt(targetDecompressedChunkSize);
+ _output.writeInt(compressionType.getValue());
+ // reserve a slot to write the data offset into
+ _output.writeInt(0);
+ _metadataSize += 4 * Integer.BYTES;
+ }
+
+ @Override
+ public void putString(String string) {
+ putBytes(string.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Override
+ public void putBytes(byte[] bytes) {
+ Preconditions.checkState(_chunkOffset < 1L << 32, "exceeded 4GB of compressed chunks");
+ int sizeRequired = Integer.BYTES + bytes.length;
+ if (_chunkBuffer.position() >= _chunkBuffer.capacity() - sizeRequired) {
Review comment:
I agree
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org
[GitHub] [pinot] richardstartin commented on a change in pull request #7661: implement size balanced V4 raw chunk format
Posted by GitBox <gi...@apache.org>.
richardstartin commented on a change in pull request #7661:
URL: https://github.com/apache/pinot/pull/7661#discussion_r745501080
##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkSVForwardIndexReaderV4.java
##########
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.readers.forward;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.io.compression.ChunkCompressorFactory;
+import org.apache.pinot.segment.local.io.writer.impl.VarByteChunkSVForwardIndexWriterV4;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.compression.ChunkDecompressor;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
+import org.apache.pinot.segment.spi.memory.CleanerUtil;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class VarByteChunkSVForwardIndexReaderV4
+ implements ForwardIndexReader<VarByteChunkSVForwardIndexReaderV4.ReaderContext> {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(VarByteChunkSVForwardIndexReaderV4.class);
+
+ private static final int METADATA_ENTRY_SIZE = 8;
+
+ private final FieldSpec.DataType _valueType;
+ private final int _targetDecompressedChunkSize;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ private final PinotDataBuffer _metadata;
+ private final PinotDataBuffer _chunks;
+
+ public VarByteChunkSVForwardIndexReaderV4(PinotDataBuffer dataBuffer, FieldSpec.DataType valueType) {
+ if (dataBuffer.getInt(0) < VarByteChunkSVForwardIndexWriterV4.VERSION) {
+ throw new IllegalStateException("version " + dataBuffer.getInt(0) + " < "
+ + VarByteChunkSVForwardIndexWriterV4.VERSION);
+ }
+ _valueType = valueType;
+ _targetDecompressedChunkSize = dataBuffer.getInt(4);
+ _chunkCompressionType = ChunkCompressionType.valueOf(dataBuffer.getInt(8));
+ _chunkDecompressor = ChunkCompressorFactory.getDecompressor(_chunkCompressionType);
+ int chunksOffset = dataBuffer.getInt(12);
+ // the file has a BE header for compatability reasons (version selection) but the content is LE
+ _metadata = dataBuffer.view(16, chunksOffset, ByteOrder.LITTLE_ENDIAN);
+ _chunks = dataBuffer.view(chunksOffset, dataBuffer.size(), ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ public boolean isDictionaryEncoded() {
+ return false;
+ }
+
+ @Override
+ public boolean isSingleValue() {
+ return true;
+ }
+
+ @Override
+ public FieldSpec.DataType getValueType() {
+ return _valueType;
+ }
+
+ @Override
+ public String getString(int docId, ReaderContext context) {
+ return new String(context.getValue(docId), StandardCharsets.UTF_8);
+ }
+
+ @Override
+ public byte[] getBytes(int docId, ReaderContext context) {
+ return context.getValue(docId);
+ }
+
+ @Nullable
+ @Override
+ public ReaderContext createContext() {
+ return _chunkCompressionType == ChunkCompressionType.PASS_THROUGH
+ ? new UncompressedReaderContext(_chunks, _metadata)
+ : new CompressedReaderContext(_metadata, _chunks, _chunkDecompressor, _chunkCompressionType,
+ _targetDecompressedChunkSize);
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ }
+
+ public static abstract class ReaderContext implements ForwardIndexReaderContext {
+
+ protected final PinotDataBuffer _chunks;
+ protected final PinotDataBuffer _metadata;
+ protected int _docIdOffset;
+ protected int _nextDocIdOffset;
+ protected boolean _regularChunk;
+ protected int _numDocsInCurrentChunk;
+
+ protected ReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) {
+ _chunks = chunks;
+ _metadata = metadata;
+ }
+
+ public byte[] getValue(int docId) {
+ if (docId >= _docIdOffset && docId < _nextDocIdOffset) {
+ return readSmallUncompressedValue(docId);
+ } else {
+ try {
+ return decompressAndRead(docId);
+ } catch (IOException e) {
+ LOGGER.error("Exception caught while decompressing data chunk", e);
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ protected long chunkIndexFor(int docId) {
+ long low = 0;
+ long high = (_metadata.size() / METADATA_ENTRY_SIZE) - 1;
+ while (low <= high) {
+ long mid = (low + high) >>> 1;
+ long position = mid * METADATA_ENTRY_SIZE;
+ int midDocId = _metadata.getInt(position) & 0x7FFFFFFF;
+ if (midDocId < docId) {
+ low = mid + 1;
+ } else if (midDocId > docId) {
+ high = mid - 1;
+ } else {
+ return position;
+ }
+ }
+ return (low - 1) * METADATA_ENTRY_SIZE;
+ }
+
+ protected abstract byte[] processChunkAndReadFirstValue(int docId, long offset, long limit)
+ throws IOException;
+
+ protected abstract byte[] readSmallUncompressedValue(int docId);
+
+ private byte[] decompressAndRead(int docId)
+ throws IOException {
+ long metadataEntry = chunkIndexFor(docId);
+ int info = _metadata.getInt(metadataEntry);
+ _docIdOffset = info & 0x7FFFFFFF;
+ _regularChunk = _docIdOffset == info;
+ long offset = _metadata.getInt(metadataEntry + Integer.BYTES) & 0xFFFFFFFFL;
+ long limit;
+ if (_metadata.size() - METADATA_ENTRY_SIZE > metadataEntry) {
+ _nextDocIdOffset = _metadata.getInt(metadataEntry + METADATA_ENTRY_SIZE) & 0x7FFFFFFF;
+ limit = _metadata.getInt(metadataEntry + METADATA_ENTRY_SIZE + Integer.BYTES) & 0xFFFFFFFFL;
+ } else {
+ _nextDocIdOffset = Integer.MAX_VALUE;
+ limit = _chunks.size();
+ }
+ return processChunkAndReadFirstValue(docId, offset, limit);
+ }
+ }
+
+ private static final class UncompressedReaderContext extends ReaderContext {
+
+ private ByteBuffer _chunk;
+
+ UncompressedReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks) {
+ super(chunks, metadata);
+ }
+
+ @Override
+ protected byte[] processChunkAndReadFirstValue(int docId, long offset, long limit) {
+ _chunk = _chunks.toDirectByteBuffer(offset, (int) (limit - offset));
+ if (!_regularChunk) {
+ return readHugeValue();
+ }
+ _numDocsInCurrentChunk = _chunk.getInt(0);
+ return readSmallUncompressedValue(docId);
+ }
+
+ private byte[] readHugeValue() {
+ byte[] value = new byte[_chunk.capacity()];
+ _chunk.get(value);
+ return value;
+ }
+
+ @Override
+ protected byte[] readSmallUncompressedValue(int docId) {
+ int index = docId - _docIdOffset;
+ int offset = _chunk.getInt((index + 1) * Integer.BYTES);
+ int nextOffset = index == _numDocsInCurrentChunk - 1
+ ? _chunk.limit()
+ : _chunk.getInt((index + 2) * Integer.BYTES);
+ ByteBuffer view = _chunk.duplicate();
+ view.position(offset);
+ view.order(ByteOrder.LITTLE_ENDIAN);
+ byte[] bytes = new byte[nextOffset - offset];
+ view.get(bytes);
+ return bytes;
+ }
+
+ @Override
+ public void close()
+ throws IOException {
+ }
+ }
+
+ private static final class CompressedReaderContext extends ReaderContext {
+
+ private final ByteBuffer _decompressedBuffer;
+ private final ChunkDecompressor _chunkDecompressor;
+ private final ChunkCompressionType _chunkCompressionType;
+
+ CompressedReaderContext(PinotDataBuffer metadata, PinotDataBuffer chunks, ChunkDecompressor chunkDecompressor,
+ ChunkCompressionType chunkCompressionType, int targetChunkSize) {
+ super(metadata, chunks);
+ _chunkDecompressor = chunkDecompressor;
+ _chunkCompressionType = chunkCompressionType;
+ _decompressedBuffer = ByteBuffer.allocateDirect(targetChunkSize).order(ByteOrder.LITTLE_ENDIAN);
+ }
+
+ @Override
+ protected byte[] processChunkAndReadFirstValue(int docId, long offset, long limit)
+ throws IOException {
+ _decompressedBuffer.clear();
+ ByteBuffer compressed = _chunks.toDirectByteBuffer(offset, (int) (limit - offset));
+ int decompressedLength = _chunkDecompressor.decompressedLength(compressed);
Review comment:
Good catch, this is always O(1) but it's not needed for the common path so we should avoid doing it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org