You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by gv...@apache.org on 2017/06/29 06:14:31 UTC

[1/2] carbondata git commit: lru object size calculation

Repository: carbondata
Updated Branches:
  refs/heads/master ac2168a98 -> c8f742d43


lru object size calculation


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/377dee94
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/377dee94
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/377dee94

Branch: refs/heads/master
Commit: 377dee94780a4fe6073c855cc980ac234a576bf6
Parents: ac2168a
Author: Raghunandan S <ra...@gmail.com>
Authored: Wed Jun 14 21:01:07 2017 +0530
Committer: sraghunandan <ca...@gmail.com>
Committed: Thu Jun 29 01:59:21 2017 +0530

----------------------------------------------------------------------
 .../carbondata/core/cache/CarbonLRUCache.java   | 29 +++++++
 .../AbstractColumnDictionaryInfo.java           | 15 +++-
 .../dictionary/AbstractDictionaryCache.java     | 89 +++++++++++++++++---
 .../core/cache/dictionary/DictionaryInfo.java   | 12 +++
 .../dictionary/ForwardDictionaryCache.java      | 42 +++++++++
 .../dictionary/ReverseDictionaryCache.java      | 54 ++++++++++++
 .../datastore/AbstractBlockIndexStoreCache.java |  4 +-
 .../core/datastore/SegmentTaskIndexStore.java   | 19 +++--
 .../reader/CarbonDictionaryMetadataReader.java  | 13 +++
 .../CarbonDictionaryMetadataReaderImpl.java     | 17 ++++
 .../core/util/ObjectSizeCalculator.java         | 71 ++++++++++++++++
 .../dictionary/ReverseDictionaryCacheTest.java  |  2 +-
 12 files changed, 347 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java b/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
index 23dd814..03838a2 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
@@ -199,6 +199,35 @@ public final class CarbonLRUCache {
   }
 
   /**
+   * This method will check if required size is available in the memory
+   * @param columnIdentifier
+   * @param requiredSize
+   * @return
+   */
+  public boolean tryPut(String columnIdentifier, long requiredSize) {
+    if (LOGGER.isDebugEnabled()) {
+      LOGGER.debug("checking Required size for entry " + columnIdentifier + " :: " + requiredSize
+          + " Current cache size :: " + currentSize);
+    }
+    boolean columnKeyCanBeAdded = false;
+    if (isLRUCacheSizeConfigured()) {
+      synchronized (lruCacheMap) {
+        if (freeMemorySizeForAddingCache(requiredSize)) {
+          columnKeyCanBeAdded = true;
+        } else {
+          LOGGER.error(
+              "Size check failed.Size not available. Entry cannot be added to lru cache :: "
+                  + columnIdentifier + " .Required Size = " + requiredSize + " Size available " + (
+                  lruCacheMemorySize - currentSize));
+        }
+      }
+    } else {
+      columnKeyCanBeAdded = true;
+    }
+    return columnKeyCanBeAdded;
+  }
+
+  /**
    * The method will add the cache entry to LRU cache map
    *
    * @param columnIdentifier

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java
index 18f4885..7e8a1c8 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java
@@ -56,6 +56,11 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo {
   protected long offsetTillFileIsRead;
 
   /**
+   * memory size of this object.We store it as calculation everytime is costly
+   */
+  protected long memorySize;
+
+  /**
    * length of dictionary metadata file
    */
   private long dictionaryMetaFileLength;
@@ -91,7 +96,11 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo {
    * @return
    */
   @Override public long getMemorySize() {
-    return offsetTillFileIsRead;
+    return memorySize;
+  }
+
+  @Override public void setMemorySize(long memorySize) {
+    this.memorySize = memorySize;
   }
 
   /**
@@ -131,6 +140,10 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo {
     this.offsetTillFileIsRead = offsetTillFileIsRead;
   }
 
+  @Override public long getOffsetTillFileIsRead() {
+    return offsetTillFileIsRead;
+  }
+
   /**
    * This method will update the timestamp of a file if a file is modified
    * like in case of incremental load

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
index d7c25f1..f0b8c78 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
@@ -32,6 +32,7 @@ import org.apache.carbondata.core.service.CarbonCommonFactory;
 import org.apache.carbondata.core.service.DictionaryService;
 import org.apache.carbondata.core.service.PathService;
 import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 
 /**
@@ -107,6 +108,34 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId
   }
 
   /**
+   * get the dictionary column meta chunk for object already read and stored in LRU cache
+   * @param dictionaryColumnUniqueIdentifier
+   * @param offsetRead
+   * @return
+   * @throws IOException
+   */
+  protected long getNumRecordsInCarbonDictionaryColumnMetaChunk(
+          DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, long offsetRead)
+          throws IOException {
+    DictionaryService dictService = CarbonCommonFactory.getDictionaryService();
+    CarbonDictionaryMetadataReader columnMetadataReaderImpl = dictService
+            .getDictionaryMetadataReader(
+                    dictionaryColumnUniqueIdentifier.getCarbonTableIdentifier(),
+                    dictionaryColumnUniqueIdentifier.getColumnIdentifier(), carbonStorePath);
+
+    CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk = null;
+    // read metadata file
+    try {
+      carbonDictionaryColumnMetaChunk =
+              columnMetadataReaderImpl.readEntryOfDictionaryMetaChunk(offsetRead);
+    } finally {
+      // close the metadata reader
+      columnMetadataReaderImpl.close();
+    }
+    return carbonDictionaryColumnMetaChunk.getMax_surrogate_key();
+  }
+
+  /**
    * This method will validate dictionary metadata file for any modification
    *
    * @param carbonFile
@@ -141,6 +170,12 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId
     return dictFile;
   }
 
+  protected long getSortIndexSize(long numOfRecords) {
+    // sort index has sort index and reverse sort index,each is 4 byte integer.
+    // 32 byte is the array header of both the integer arrays
+    return numOfRecords * ObjectSizeCalculator.estimate(new Integer(0), 16) * 2 + 32;
+  }
+
   /**
    * This method will get the value for the given key. If value does not exist
    * for the given key, it will check and load the value.
@@ -176,28 +211,37 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId
         if (dictionaryMetaFileModified) {
           CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk =
               readLastChunkFromDictionaryMetadataFile(dictionaryColumnUniqueIdentifier);
-          // required size will be size total size of file - offset till file is
-          // already read
-          long requiredSize =
-              carbonDictionaryColumnMetaChunk.getEnd_offset() - dictionaryInfo.getMemorySize();
+
+          long requiredSize = getEstimatedDictionarySize(dictionaryInfo,
+              carbonDictionaryColumnMetaChunk,
+              dictionaryColumnUniqueIdentifier, loadSortIndex);
+
           if (requiredSize > 0) {
-            boolean columnAddedToLRUCache =
-                carbonLRUCache.put(lruCacheKey, dictionaryInfo, requiredSize);
-            // if column is successfully added to lru cache then only load the
+            dictionaryInfo.setMemorySize(requiredSize);
+            boolean colCanBeAddedToLRUCache =
+                    carbonLRUCache.tryPut(lruCacheKey, requiredSize);
+            // if column can be added to lru cache then only load the
             // dictionary data
-            if (columnAddedToLRUCache) {
+            if (colCanBeAddedToLRUCache) {
               // load dictionary data
               loadDictionaryData(dictionaryInfo, dictionaryColumnUniqueIdentifier,
-                  dictionaryInfo.getMemorySize(), carbonDictionaryColumnMetaChunk.getEnd_offset(),
-                  loadSortIndex);
+                      dictionaryInfo.getOffsetTillFileIsRead(),
+                      carbonDictionaryColumnMetaChunk.getEnd_offset(),
+                      loadSortIndex);
               // set the end offset till where file is read
               dictionaryInfo
-                  .setOffsetTillFileIsRead(carbonDictionaryColumnMetaChunk.getEnd_offset());
+                      .setOffsetTillFileIsRead(carbonDictionaryColumnMetaChunk.getEnd_offset());
+              long updateRequiredSize = ObjectSizeCalculator.estimate(dictionaryInfo, requiredSize);
+              dictionaryInfo.setMemorySize(updateRequiredSize);
+              if (!carbonLRUCache.put(lruCacheKey, dictionaryInfo, updateRequiredSize)) {
+                throw new DictionaryBuilderException(
+                        "Cannot load dictionary into memory. Not enough memory available");
+              }
               dictionaryInfo.setFileTimeStamp(carbonFile.getLastModifiedTime());
               dictionaryInfo.setDictionaryMetaFileLength(carbonFile.getSize());
             } else {
               throw new DictionaryBuilderException(
-                  "Cannot load dictionary into memory. Not enough memory available");
+                      "Cannot load dictionary into memory. Not enough memory available");
             }
           }
         }
@@ -262,4 +306,25 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId
       dictionary.clear();
     }
   }
+
+  /**
+   * calculate the probable size of Dictionary in java heap
+   * Use the value to check if can be added to lru cache
+   * This helps to avoid unnecessary loading of dictionary files
+   * if estimated size more than that can be fit into lru cache
+   * Estimated size can be less or greater than the actual size
+   * due to java optimizations
+   * @param dictionaryInfo
+   * @param carbonDictionaryColumnMetaChunk
+   * @param dictionaryColumnUniqueIdentifier
+   * @param readSortIndexSize
+   * @return
+   * @throws IOException
+   */
+  protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo,
+      CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk,
+      DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean
+      readSortIndexSize) throws IOException {
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java
index a14c3d6..0fbb4bb 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java
@@ -41,6 +41,18 @@ public interface DictionaryInfo extends Cacheable, Dictionary {
   void setOffsetTillFileIsRead(long offsetTillFileIsRead);
 
   /**
+   * offset till the file is read
+   * @return
+   */
+  long getOffsetTillFileIsRead();
+
+  /**
+   * the memory size of this object after loaded into memory
+   * @param memorySize
+   */
+  void setMemorySize(long memorySize);
+
+  /**
    * This method will update the timestamp of a file if a file is modified
    * like in case of incremental load
    *

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java
index e46f3f5..b23bd49 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java
@@ -32,6 +32,9 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.cache.CacheType;
 import org.apache.carbondata.core.cache.CarbonLRUCache;
+import org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
 
 /**
  * This class implements methods to create dictionary cache which will hold
@@ -49,6 +52,11 @@ public class ForwardDictionaryCache<K extends
   private static final Map<DictionaryColumnUniqueIdentifier, Object> DICTIONARY_LOCK_OBJECT =
       new HashMap<>();
 
+  private static final long sizeOfEmptyDictChunks =
+      ObjectSizeCalculator.estimate(new ArrayList<byte[]>(CarbonUtil.getDictionaryChunkSize()), 16);
+
+  private static final long byteArraySize = ObjectSizeCalculator.estimate(new byte[0], 16);
+
   /**
    * @param carbonStorePath
    * @param carbonLRUCache
@@ -231,4 +239,38 @@ public class ForwardDictionaryCache<K extends
       cacheable.clear();
     }
   }
+
+  @Override protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo,
+      CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk,
+      DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean
+      readSortIndexSize) throws IOException {
+    // required size will be size total size of file - offset till file is
+    // already read
+    long requiredSize =
+        carbonDictionaryColumnMetaChunk.getEnd_offset() -
+            dictionaryInfo.getOffsetTillFileIsRead();
+
+    long numOfRecords = dictionaryInfo.getOffsetTillFileIsRead() == 0 ?
+        carbonDictionaryColumnMetaChunk.getMax_surrogate_key() :
+        carbonDictionaryColumnMetaChunk.getMax_surrogate_key()
+            - getNumRecordsInCarbonDictionaryColumnMetaChunk(
+            dictionaryColumnUniqueIdentifier,
+            dictionaryInfo.getOffsetTillFileIsRead());
+
+    if (numOfRecords > 0) {
+      long avgRecordsSize = requiredSize / numOfRecords;
+      long bytesPerRecord = (long)Math.ceil(avgRecordsSize / 8.0) * 8;
+
+      requiredSize = (bytesPerRecord + byteArraySize) * numOfRecords;
+    }
+
+    if (readSortIndexSize) {
+      // every time we are loading all the sort index files.Hence memory calculation for all
+      // the records
+      requiredSize = requiredSize + getSortIndexSize(
+          carbonDictionaryColumnMetaChunk.getMax_surrogate_key());
+    }
+
+    return requiredSize + sizeOfEmptyDictChunks;
+  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java
index d63d42a..28568b5 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@@ -30,6 +31,9 @@ import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.cache.CacheType;
 import org.apache.carbondata.core.cache.CarbonLRUCache;
+import org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
 
 /**
  * This class implements methods to create dictionary cache which will hold
@@ -45,6 +49,20 @@ public class ReverseDictionaryCache<K extends DictionaryColumnUniqueIdentifier,
   private static final LogService LOGGER =
       LogServiceFactory.getLogService(ReverseDictionaryCache.class.getName());
 
+  private static final long sizeOfEmptyDictChunks =
+      ObjectSizeCalculator.estimate(new ArrayList<byte[]>(CarbonUtil.getDictionaryChunkSize()), 16);
+
+  private static final long sizeOfEmptyHashMap = ObjectSizeCalculator.estimate(new
+      ConcurrentHashMap<DictionaryByteArrayWrapper,
+          Integer>(CarbonUtil.getDictionaryChunkSize()), 16);
+
+  private static final long sizeOfHashMapNode = ObjectSizeCalculator.estimate(new
+      DictionaryByteArrayWrapper(new byte[0]), 16) +
+      ObjectSizeCalculator.estimate(new Integer(0), 16);
+
+  private static final long byteArraySize = ObjectSizeCalculator.estimate(new byte[0], 16);
+
+
   /**
    * @param carbonStorePath
    * @param carbonLRUCache
@@ -209,4 +227,40 @@ public class ReverseDictionaryCache<K extends DictionaryColumnUniqueIdentifier,
       cacheable.clear();
     }
   }
+
+  @Override protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo,
+      CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk,
+      DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean
+      readSortIndexSize) throws IOException {
+    // required size will be size total size of file - offset till file is
+    // already read
+    long requiredSize =
+        carbonDictionaryColumnMetaChunk.getEnd_offset() -
+            dictionaryInfo.getOffsetTillFileIsRead();
+
+    long numOfRecords = dictionaryInfo.getOffsetTillFileIsRead() == 0 ?
+        carbonDictionaryColumnMetaChunk.getMax_surrogate_key() :
+        carbonDictionaryColumnMetaChunk.getMax_surrogate_key()
+            - getNumRecordsInCarbonDictionaryColumnMetaChunk(
+            dictionaryColumnUniqueIdentifier,
+            dictionaryInfo.getOffsetTillFileIsRead());
+
+    if (numOfRecords > 0) {
+      long avgRecordsSize = requiredSize / numOfRecords;
+      long bytesPerRecord = (long)Math.ceil(avgRecordsSize / 8.0) * 8;
+
+      requiredSize = (bytesPerRecord + byteArraySize) * numOfRecords;
+    }
+
+    if (readSortIndexSize) {
+      // every time we are loading all the sort index files.Hence memory calculation for all
+      // the records
+      requiredSize = requiredSize + getSortIndexSize(
+          carbonDictionaryColumnMetaChunk.getMax_surrogate_key());
+    }
+
+    requiredSize = requiredSize + (sizeOfHashMapNode * numOfRecords);
+
+    return requiredSize + sizeOfEmptyDictChunks + sizeOfEmptyHashMap;
+  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java b/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java
index 5e8c8a3..8cfc602 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java
@@ -32,6 +32,7 @@ import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
 import org.apache.carbondata.core.datastore.exception.IndexBuilderException;
 import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
 
 /**
  * This class validate and load the B-Tree in the executor lru cache
@@ -92,13 +93,14 @@ public abstract class AbstractBlockIndexStoreCache<K, V>
     TableBlockInfo blockInfo = tableBlockUniqueIdentifier.getTableBlockInfo();
     long requiredMetaSize = CarbonUtil.calculateMetaSize(blockInfo);
     if (requiredMetaSize > 0) {
-      tableBlock.setMemorySize(requiredMetaSize);
       // load table blocks data
       // getting the data file meta data of the block
       DataFileFooter footer = CarbonUtil.readMetadatFile(blockInfo);
       footer.setBlockInfo(new BlockInfo(blockInfo));
       // building the block
       tableBlock.buildIndex(Collections.singletonList(footer));
+      requiredMetaSize = ObjectSizeCalculator.estimate(blockInfo, requiredMetaSize);
+      tableBlock.setMemorySize(requiredMetaSize);
       tableBlock.incrementAccessCount();
       boolean isTableBlockAddedToLruCache = lruCache.put(lruCacheKey, tableBlock, requiredMetaSize);
       if (!isTableBlockAddedToLruCache) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java
index 734aaaf..980ea3e 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java
@@ -43,6 +43,7 @@ import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
 import org.apache.carbondata.core.mutate.UpdateVO;
 import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.core.util.path.CarbonTablePath.DataFileUtil;
 
@@ -233,11 +234,10 @@ public class SegmentTaskIndexStore
                   taskIdToTableBlockInfoMap.entrySet().iterator();
               long requiredSize =
                   calculateRequiredSize(taskIdToTableBlockInfoMap, absoluteTableIdentifier);
-              segmentTaskIndexWrapper
-                  .setMemorySize(requiredSize + segmentTaskIndexWrapper.getMemorySize());
-              boolean isAddedToLruCache =
-                  lruCache.put(lruCacheKey, segmentTaskIndexWrapper, requiredSize);
-              if (isAddedToLruCache) {
+              segmentTaskIndexWrapper.setMemorySize(requiredSize);
+              boolean canAddToLruCache =
+                  lruCache.tryPut(lruCacheKey, requiredSize);
+              if (canAddToLruCache) {
                 while (iterator.hasNext()) {
                   Map.Entry<TaskBucketHolder, List<TableBlockInfo>> taskToBlockInfoList =
                       iterator.next();
@@ -246,6 +246,15 @@ public class SegmentTaskIndexStore
                       loadBlocks(taskBucketHolder, taskToBlockInfoList.getValue(),
                           absoluteTableIdentifier));
                 }
+                long updatedRequiredSize =
+                    ObjectSizeCalculator.estimate(segmentTaskIndexWrapper, requiredSize);
+                // update the actual size of object
+                segmentTaskIndexWrapper.setMemorySize(updatedRequiredSize);
+                if (!lruCache.put(lruCacheKey, segmentTaskIndexWrapper, updatedRequiredSize)) {
+                  throw new IndexBuilderException(
+                          "Can not load the segment. No Enough space available.");
+                }
+
               } else {
                 throw new IndexBuilderException(
                     "Can not load the segment. No Enough space available.");

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java
index f2e09ec..cc14187 100644
--- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java
+++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java
@@ -53,4 +53,17 @@ public interface CarbonDictionaryMetadataReader extends Closeable {
    * @throws IOException if an I/O error occurs
    */
   CarbonDictionaryColumnMetaChunk readLastEntryOfDictionaryMetaChunk() throws IOException;
+
+  /**
+   * This method will be used to read the last dictionary meta chunk ending at end_Offset.
+   * Applicable scenarios :
+   * 1. When loading into LRU cache, we need to calculate the size of Object in memory,for
+   * this we need the number of records already loaded into LRU cache, so that we can calculate
+   * the memory required for incremental load
+   *
+   * @return last segment entry for dictionary chunk
+   * @throws IOException if an I/O error occurs
+   */
+  CarbonDictionaryColumnMetaChunk readEntryOfDictionaryMetaChunk(long end_Offset)
+          throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java
index 98c0ddb..9356974 100644
--- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java
@@ -133,6 +133,23 @@ public class CarbonDictionaryMetadataReaderImpl implements CarbonDictionaryMetad
     return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
   }
 
+  @Override public CarbonDictionaryColumnMetaChunk readEntryOfDictionaryMetaChunk(long end_Offset)
+          throws IOException {
+    ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
+    // open dictionary meta thrift reader
+    openThriftReader();
+    // at the completion of while loop we will get the last dictionary chunk entry
+    while (dictionaryMetadataFileReader.hasNext()) {
+      // get the thrift object for dictionary chunk
+      dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
+      if (dictionaryChunkMeta.end_offset >= end_Offset) {
+        break;
+      }
+    }
+    // create a new instance of chunk meta wrapper using thrift object
+    return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
+  }
+
   /**
    * Closes this stream and releases any system resources associated
    * with it. If the stream is already closed then invoking this

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java b/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java
new file mode 100644
index 0000000..513e786
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.util;
+
+import java.lang.reflect.Method;
+
+import org.apache.carbondata.common.logging.LogService;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+
+/**
+ * This wrapper class is created so that core doesnt have direct dependency on spark
+ * TODO: Need to have carbon implementation if carbon needs to be used without spark
+ */
+public final class ObjectSizeCalculator {
+  /**
+   * Logger object for the class
+   */
+  private static final LogService LOGGER =
+      LogServiceFactory.getLogService(ObjectSizeCalculator.class.getName());
+
+  /**
+   * Class of spark to invoke
+   */
+  private static String className = "org.apache.spark.util.SizeEstimator";
+
+  private static Method estimateMethod = null;
+
+  private static boolean methodAccessible = true;
+
+  /**
+   * Invoke the spark's implementation of Object size computation
+   * return the default value passed if function cannot be invoked
+   * @param anObject
+   * @param defValue
+   * @return
+   */
+  public static long estimate(Object anObject, long defValue) {
+    try {
+      if (methodAccessible) {
+        if (null == estimateMethod) {
+          estimateMethod = Class.forName(className).getMethod("estimate", Object.class);
+          estimateMethod.setAccessible(true);
+        }
+        return (Long) estimateMethod.invoke(null, anObject);
+      } else {
+        return defValue;
+      }
+    } catch (Throwable ex) {
+      // throwable is being caught as external interface is being invoked through reflection
+      // and runtime exceptions might get thrown
+      LOGGER.error(ex, "Could not access method SizeEstimator:estimate.Returning default value");
+      methodAccessible = false;
+      return defValue;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java b/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java
index 628c3ff..b06fc4d 100644
--- a/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java
@@ -206,7 +206,7 @@ public class ReverseDictionaryCacheTest extends AbstractDictionaryCacheTest {
     // available size limit
     new MockUp<CarbonDictionaryColumnMetaChunk>() {
       @Mock public long getEnd_offset() {
-        return 10485755L;
+        return 10445000L;
       }
     };
     columnIdentifier = columnIdentifiers[1];


[2/2] carbondata git commit: [CARBONDATA-1179] Improve Size calculation of Objects of LRU cache. This closes #1038.

Posted by gv...@apache.org.
[CARBONDATA-1179] Improve Size calculation of Objects of LRU cache. This closes #1038.


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c8f742d4
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c8f742d4
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c8f742d4

Branch: refs/heads/master
Commit: c8f742d434e29a6a9000815d9e7921462120c77d
Parents: ac2168a 377dee9
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Thu Jun 29 11:41:35 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 11:41:35 2017 +0530

----------------------------------------------------------------------
 .../carbondata/core/cache/CarbonLRUCache.java   | 29 +++++++
 .../AbstractColumnDictionaryInfo.java           | 15 +++-
 .../dictionary/AbstractDictionaryCache.java     | 89 +++++++++++++++++---
 .../core/cache/dictionary/DictionaryInfo.java   | 12 +++
 .../dictionary/ForwardDictionaryCache.java      | 42 +++++++++
 .../dictionary/ReverseDictionaryCache.java      | 54 ++++++++++++
 .../datastore/AbstractBlockIndexStoreCache.java |  4 +-
 .../core/datastore/SegmentTaskIndexStore.java   | 19 +++--
 .../reader/CarbonDictionaryMetadataReader.java  | 13 +++
 .../CarbonDictionaryMetadataReaderImpl.java     | 17 ++++
 .../core/util/ObjectSizeCalculator.java         | 71 ++++++++++++++++
 .../dictionary/ReverseDictionaryCacheTest.java  |  2 +-
 12 files changed, 347 insertions(+), 20 deletions(-)
----------------------------------------------------------------------