You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@carbondata.apache.org by jackylk <gi...@git.apache.org> on 2018/04/13 10:04:40 UTC

[GitHub] carbondata pull request #2149: [CARBONDATA-2325]Page level uncompress and Im...

Github user jackylk commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2149#discussion_r181342780
  
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimensionDataChunkStore.java ---
    @@ -78,70 +88,96 @@ public UnsafeVariableLengthDimensionDataChunkStore(long totalSize, boolean isInv
     
         // start position will be used to store the current data position
         int startOffset = 0;
    -    // position from where offsets will start
    -    long pointerOffsets = this.dataPointersOffsets;
         // as first position will be start from 2 byte as data is stored first in the memory block
         // we need to skip first two bytes this is because first two bytes will be length of the data
         // which we have to skip
    -    CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(),
    -        dataPageMemoryBlock.getBaseOffset() + pointerOffsets,
    -        CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
    -    // incrementing the pointers as first value is already filled and as we are storing as int
    -    // we need to increment the 4 bytes to set the position of the next value to set
    -    pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE;
    +    int [] dataOffsets = new int[numberOfRows];
    +    dataOffsets[0] = CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
         // creating a byte buffer which will wrap the length of the row
    -    // using byte buffer as unsafe will return bytes in little-endian encoding
    -    ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
    -    // store length of data
    -    byte[] length = new byte[CarbonCommonConstants.SHORT_SIZE_IN_BYTE];
    -    // as first offset is already stored, we need to start from the 2nd row in data array
    +    ByteBuffer buffer = ByteBuffer.wrap(data);
         for (int i = 1; i < numberOfRows; i++) {
    -      // first copy the length of previous row
    -      CarbonUnsafe.getUnsafe().copyMemory(dataPageMemoryBlock.getBaseObject(),
    -          dataPageMemoryBlock.getBaseOffset() + startOffset, length, CarbonUnsafe.BYTE_ARRAY_OFFSET,
    -          CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
    -      buffer.put(length);
    -      buffer.flip();
    +      buffer.position(startOffset);
           // so current row position will be
           // previous row length + 2 bytes used for storing previous row data
    -      startOffset += CarbonCommonConstants.SHORT_SIZE_IN_BYTE + buffer.getShort();
    +      startOffset += buffer.getShort() + CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
           // as same byte buffer is used to avoid creating many byte buffer for each row
           // we need to clear the byte buffer
    -      buffer.clear();
    -      // now put the offset of current row, here we need to add 2 more bytes as current will
    -      // also have length part so we have to skip length
    -      CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(),
    -          dataPageMemoryBlock.getBaseOffset() + pointerOffsets,
    -          startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
    -      // incrementing the pointers as first value is already filled and as we are storing as int
    -      // we need to increment the 4 bytes to set the position of the next value to set
    -      pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE;
    +      dataOffsets[i] = startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
         }
    -
    +    CarbonUnsafe.getUnsafe().copyMemory(dataOffsets, CarbonUnsafe.INT_ARRAY_OFFSET,
    +        dataPageMemoryBlock.getBaseObject(),
    +        dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets,
    +        dataOffsets.length * CarbonCommonConstants.INT_SIZE_IN_BYTE);
       }
     
       /**
        * Below method will be used to get the row based on row id passed
    -   *
    +   * Getting the row from unsafe works in below logic
    +   * 1. if inverted index is present then get the row id based on reverse inverted index
    +   * 2. get the current row id data offset
    +   * 3. if it's not a last row- get the next row offset
    +   * Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
    +   * 4. if it's last row
    +   * subtract the current row offset + 2 bytes(to skip the data length) with complete data length
        * @param rowId
        * @return row
        */
       @Override public byte[] getRow(int rowId) {
    +    // get the actual row id
    +    rowId = getRowId(rowId);
    +    // get offset of data in unsafe
    +    int currentDataOffset = getOffSet(rowId);
    +    // get the data length
    +    short length = getLength(rowId, currentDataOffset);
    +    // create data array
    +    byte[] data = new byte[length];
    +    // fill the row data
    +    fillRowInternal(length, data, currentDataOffset);
    +    return data;
    +  }
    +
    +  /**
    +   * Returns the actual row id for data
    +   * if inverted index is present then get the row id based on reverse inverted index
    +   * otherwise return the same row id
    +   * @param rowId
    +   * row id
    +   * @return actual row id
    +   */
    +  private int getRowId(int rowId) {
         // if column was explicitly sorted we need to get the rowid based inverted index reverse
         if (isExplicitSorted) {
           rowId = CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
               dataPageMemoryBlock.getBaseOffset() + this.invertedIndexReverseOffset + ((long)rowId
                   * CarbonCommonConstants.INT_SIZE_IN_BYTE));
         }
    -    // now to get the row from memory block we need to do following thing
    -    // 1. first get the current offset
    -    // 2. if it's not a last row- get the next row offset
    -    // Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
    -    // else subtract the current row offset + 2 bytes(to skip the data length)
    -    // with complete data length
    -    int currentDataOffset = CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
    -        dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + (rowId
    +    return rowId;
    +  }
    +
    +  /**
    +   * get data offset based on current row id
    +   * @param rowId
    +   * row id
    --- End diff --
    
    move it to previous line, please modify all places


---