You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@carbondata.apache.org by jackylk <gi...@git.apache.org> on 2018/04/13 10:04:40 UTC
[GitHub] carbondata pull request #2149: [CARBONDATA-2325]Page level uncompress and Im...
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2149#discussion_r181342780
--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimensionDataChunkStore.java ---
@@ -78,70 +88,96 @@ public UnsafeVariableLengthDimensionDataChunkStore(long totalSize, boolean isInv
// start position will be used to store the current data position
int startOffset = 0;
- // position from where offsets will start
- long pointerOffsets = this.dataPointersOffsets;
// as first position will be start from 2 byte as data is stored first in the memory block
// we need to skip first two bytes this is because first two bytes will be length of the data
// which we have to skip
- CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(),
- dataPageMemoryBlock.getBaseOffset() + pointerOffsets,
- CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
- // incrementing the pointers as first value is already filled and as we are storing as int
- // we need to increment the 4 bytes to set the position of the next value to set
- pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE;
+ int [] dataOffsets = new int[numberOfRows];
+ dataOffsets[0] = CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
// creating a byte buffer which will wrap the length of the row
- // using byte buffer as unsafe will return bytes in little-endian encoding
- ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
- // store length of data
- byte[] length = new byte[CarbonCommonConstants.SHORT_SIZE_IN_BYTE];
- // as first offset is already stored, we need to start from the 2nd row in data array
+ ByteBuffer buffer = ByteBuffer.wrap(data);
for (int i = 1; i < numberOfRows; i++) {
- // first copy the length of previous row
- CarbonUnsafe.getUnsafe().copyMemory(dataPageMemoryBlock.getBaseObject(),
- dataPageMemoryBlock.getBaseOffset() + startOffset, length, CarbonUnsafe.BYTE_ARRAY_OFFSET,
- CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
- buffer.put(length);
- buffer.flip();
+ buffer.position(startOffset);
// so current row position will be
// previous row length + 2 bytes used for storing previous row data
- startOffset += CarbonCommonConstants.SHORT_SIZE_IN_BYTE + buffer.getShort();
+ startOffset += buffer.getShort() + CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
// as same byte buffer is used to avoid creating many byte buffer for each row
// we need to clear the byte buffer
- buffer.clear();
- // now put the offset of current row, here we need to add 2 more bytes as current will
- // also have length part so we have to skip length
- CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(),
- dataPageMemoryBlock.getBaseOffset() + pointerOffsets,
- startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
- // incrementing the pointers as first value is already filled and as we are storing as int
- // we need to increment the 4 bytes to set the position of the next value to set
- pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE;
+ dataOffsets[i] = startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
}
-
+ CarbonUnsafe.getUnsafe().copyMemory(dataOffsets, CarbonUnsafe.INT_ARRAY_OFFSET,
+ dataPageMemoryBlock.getBaseObject(),
+ dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets,
+ dataOffsets.length * CarbonCommonConstants.INT_SIZE_IN_BYTE);
}
/**
* Below method will be used to get the row based on row id passed
- *
+ * Getting the row from unsafe works in below logic
+ * 1. if inverted index is present then get the row id based on reverse inverted index
+ * 2. get the current row id data offset
+ * 3. if it's not a last row- get the next row offset
+ * Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
+ * 4. if it's last row
+ * subtract the current row offset + 2 bytes(to skip the data length) with complete data length
* @param rowId
* @return row
*/
@Override public byte[] getRow(int rowId) {
+ // get the actual row id
+ rowId = getRowId(rowId);
+ // get offset of data in unsafe
+ int currentDataOffset = getOffSet(rowId);
+ // get the data length
+ short length = getLength(rowId, currentDataOffset);
+ // create data array
+ byte[] data = new byte[length];
+ // fill the row data
+ fillRowInternal(length, data, currentDataOffset);
+ return data;
+ }
+
+ /**
+ * Returns the actual row id for data
+ * if inverted index is present then get the row id based on reverse inverted index
+ * otherwise return the same row id
+ * @param rowId
+ * row id
+ * @return actual row id
+ */
+ private int getRowId(int rowId) {
// if column was explicitly sorted we need to get the rowid based inverted index reverse
if (isExplicitSorted) {
rowId = CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
dataPageMemoryBlock.getBaseOffset() + this.invertedIndexReverseOffset + ((long)rowId
* CarbonCommonConstants.INT_SIZE_IN_BYTE));
}
- // now to get the row from memory block we need to do following thing
- // 1. first get the current offset
- // 2. if it's not a last row- get the next row offset
- // Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
- // else subtract the current row offset + 2 bytes(to skip the data length)
- // with complete data length
- int currentDataOffset = CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
- dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + (rowId
+ return rowId;
+ }
+
+ /**
+ * get data offset based on current row id
+ * @param rowId
+ * row id
--- End diff --
move it to previous line, please modify all places
---