You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by gv...@apache.org on 2017/06/29 06:14:31 UTC
[1/2] carbondata git commit: lru object size calculation
Repository: carbondata
Updated Branches:
refs/heads/master ac2168a98 -> c8f742d43
lru object size calculation
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/377dee94
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/377dee94
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/377dee94
Branch: refs/heads/master
Commit: 377dee94780a4fe6073c855cc980ac234a576bf6
Parents: ac2168a
Author: Raghunandan S <ra...@gmail.com>
Authored: Wed Jun 14 21:01:07 2017 +0530
Committer: sraghunandan <ca...@gmail.com>
Committed: Thu Jun 29 01:59:21 2017 +0530
----------------------------------------------------------------------
.../carbondata/core/cache/CarbonLRUCache.java | 29 +++++++
.../AbstractColumnDictionaryInfo.java | 15 +++-
.../dictionary/AbstractDictionaryCache.java | 89 +++++++++++++++++---
.../core/cache/dictionary/DictionaryInfo.java | 12 +++
.../dictionary/ForwardDictionaryCache.java | 42 +++++++++
.../dictionary/ReverseDictionaryCache.java | 54 ++++++++++++
.../datastore/AbstractBlockIndexStoreCache.java | 4 +-
.../core/datastore/SegmentTaskIndexStore.java | 19 +++--
.../reader/CarbonDictionaryMetadataReader.java | 13 +++
.../CarbonDictionaryMetadataReaderImpl.java | 17 ++++
.../core/util/ObjectSizeCalculator.java | 71 ++++++++++++++++
.../dictionary/ReverseDictionaryCacheTest.java | 2 +-
12 files changed, 347 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java b/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
index 23dd814..03838a2 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
@@ -199,6 +199,35 @@ public final class CarbonLRUCache {
}
/**
+ * This method will check if required size is available in the memory
+ * @param columnIdentifier
+ * @param requiredSize
+ * @return
+ */
+ public boolean tryPut(String columnIdentifier, long requiredSize) {
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("checking Required size for entry " + columnIdentifier + " :: " + requiredSize
+ + " Current cache size :: " + currentSize);
+ }
+ boolean columnKeyCanBeAdded = false;
+ if (isLRUCacheSizeConfigured()) {
+ synchronized (lruCacheMap) {
+ if (freeMemorySizeForAddingCache(requiredSize)) {
+ columnKeyCanBeAdded = true;
+ } else {
+ LOGGER.error(
+ "Size check failed.Size not available. Entry cannot be added to lru cache :: "
+ + columnIdentifier + " .Required Size = " + requiredSize + " Size available " + (
+ lruCacheMemorySize - currentSize));
+ }
+ }
+ } else {
+ columnKeyCanBeAdded = true;
+ }
+ return columnKeyCanBeAdded;
+ }
+
+ /**
* The method will add the cache entry to LRU cache map
*
* @param columnIdentifier
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java
index 18f4885..7e8a1c8 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java
@@ -56,6 +56,11 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo {
protected long offsetTillFileIsRead;
/**
+ * memory size of this object.We store it as calculation everytime is costly
+ */
+ protected long memorySize;
+
+ /**
* length of dictionary metadata file
*/
private long dictionaryMetaFileLength;
@@ -91,7 +96,11 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo {
* @return
*/
@Override public long getMemorySize() {
- return offsetTillFileIsRead;
+ return memorySize;
+ }
+
+ @Override public void setMemorySize(long memorySize) {
+ this.memorySize = memorySize;
}
/**
@@ -131,6 +140,10 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo {
this.offsetTillFileIsRead = offsetTillFileIsRead;
}
+ @Override public long getOffsetTillFileIsRead() {
+ return offsetTillFileIsRead;
+ }
+
/**
* This method will update the timestamp of a file if a file is modified
* like in case of incremental load
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
index d7c25f1..f0b8c78 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
@@ -32,6 +32,7 @@ import org.apache.carbondata.core.service.CarbonCommonFactory;
import org.apache.carbondata.core.service.DictionaryService;
import org.apache.carbondata.core.service.PathService;
import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
import org.apache.carbondata.core.util.path.CarbonTablePath;
/**
@@ -107,6 +108,34 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId
}
/**
+ * get the dictionary column meta chunk for object already read and stored in LRU cache
+ * @param dictionaryColumnUniqueIdentifier
+ * @param offsetRead
+ * @return
+ * @throws IOException
+ */
+ protected long getNumRecordsInCarbonDictionaryColumnMetaChunk(
+ DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, long offsetRead)
+ throws IOException {
+ DictionaryService dictService = CarbonCommonFactory.getDictionaryService();
+ CarbonDictionaryMetadataReader columnMetadataReaderImpl = dictService
+ .getDictionaryMetadataReader(
+ dictionaryColumnUniqueIdentifier.getCarbonTableIdentifier(),
+ dictionaryColumnUniqueIdentifier.getColumnIdentifier(), carbonStorePath);
+
+ CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk = null;
+ // read metadata file
+ try {
+ carbonDictionaryColumnMetaChunk =
+ columnMetadataReaderImpl.readEntryOfDictionaryMetaChunk(offsetRead);
+ } finally {
+ // close the metadata reader
+ columnMetadataReaderImpl.close();
+ }
+ return carbonDictionaryColumnMetaChunk.getMax_surrogate_key();
+ }
+
+ /**
* This method will validate dictionary metadata file for any modification
*
* @param carbonFile
@@ -141,6 +170,12 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId
return dictFile;
}
+ protected long getSortIndexSize(long numOfRecords) {
+ // sort index has sort index and reverse sort index,each is 4 byte integer.
+ // 32 byte is the array header of both the integer arrays
+ return numOfRecords * ObjectSizeCalculator.estimate(new Integer(0), 16) * 2 + 32;
+ }
+
/**
* This method will get the value for the given key. If value does not exist
* for the given key, it will check and load the value.
@@ -176,28 +211,37 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId
if (dictionaryMetaFileModified) {
CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk =
readLastChunkFromDictionaryMetadataFile(dictionaryColumnUniqueIdentifier);
- // required size will be size total size of file - offset till file is
- // already read
- long requiredSize =
- carbonDictionaryColumnMetaChunk.getEnd_offset() - dictionaryInfo.getMemorySize();
+
+ long requiredSize = getEstimatedDictionarySize(dictionaryInfo,
+ carbonDictionaryColumnMetaChunk,
+ dictionaryColumnUniqueIdentifier, loadSortIndex);
+
if (requiredSize > 0) {
- boolean columnAddedToLRUCache =
- carbonLRUCache.put(lruCacheKey, dictionaryInfo, requiredSize);
- // if column is successfully added to lru cache then only load the
+ dictionaryInfo.setMemorySize(requiredSize);
+ boolean colCanBeAddedToLRUCache =
+ carbonLRUCache.tryPut(lruCacheKey, requiredSize);
+ // if column can be added to lru cache then only load the
// dictionary data
- if (columnAddedToLRUCache) {
+ if (colCanBeAddedToLRUCache) {
// load dictionary data
loadDictionaryData(dictionaryInfo, dictionaryColumnUniqueIdentifier,
- dictionaryInfo.getMemorySize(), carbonDictionaryColumnMetaChunk.getEnd_offset(),
- loadSortIndex);
+ dictionaryInfo.getOffsetTillFileIsRead(),
+ carbonDictionaryColumnMetaChunk.getEnd_offset(),
+ loadSortIndex);
// set the end offset till where file is read
dictionaryInfo
- .setOffsetTillFileIsRead(carbonDictionaryColumnMetaChunk.getEnd_offset());
+ .setOffsetTillFileIsRead(carbonDictionaryColumnMetaChunk.getEnd_offset());
+ long updateRequiredSize = ObjectSizeCalculator.estimate(dictionaryInfo, requiredSize);
+ dictionaryInfo.setMemorySize(updateRequiredSize);
+ if (!carbonLRUCache.put(lruCacheKey, dictionaryInfo, updateRequiredSize)) {
+ throw new DictionaryBuilderException(
+ "Cannot load dictionary into memory. Not enough memory available");
+ }
dictionaryInfo.setFileTimeStamp(carbonFile.getLastModifiedTime());
dictionaryInfo.setDictionaryMetaFileLength(carbonFile.getSize());
} else {
throw new DictionaryBuilderException(
- "Cannot load dictionary into memory. Not enough memory available");
+ "Cannot load dictionary into memory. Not enough memory available");
}
}
}
@@ -262,4 +306,25 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId
dictionary.clear();
}
}
+
+ /**
+ * calculate the probable size of Dictionary in java heap
+ * Use the value to check if can be added to lru cache
+ * This helps to avoid unnecessary loading of dictionary files
+ * if estimated size more than that can be fit into lru cache
+ * Estimated size can be less or greater than the actual size
+ * due to java optimizations
+ * @param dictionaryInfo
+ * @param carbonDictionaryColumnMetaChunk
+ * @param dictionaryColumnUniqueIdentifier
+ * @param readSortIndexSize
+ * @return
+ * @throws IOException
+ */
+ protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo,
+ CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk,
+ DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean
+ readSortIndexSize) throws IOException {
+ return 0;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java
index a14c3d6..0fbb4bb 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java
@@ -41,6 +41,18 @@ public interface DictionaryInfo extends Cacheable, Dictionary {
void setOffsetTillFileIsRead(long offsetTillFileIsRead);
/**
+ * offset till the file is read
+ * @return
+ */
+ long getOffsetTillFileIsRead();
+
+ /**
+ * the memory size of this object after loaded into memory
+ * @param memorySize
+ */
+ void setMemorySize(long memorySize);
+
+ /**
* This method will update the timestamp of a file if a file is modified
* like in case of incremental load
*
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java
index e46f3f5..b23bd49 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java
@@ -32,6 +32,9 @@ import org.apache.carbondata.common.logging.LogService;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.cache.CacheType;
import org.apache.carbondata.core.cache.CarbonLRUCache;
+import org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
/**
* This class implements methods to create dictionary cache which will hold
@@ -49,6 +52,11 @@ public class ForwardDictionaryCache<K extends
private static final Map<DictionaryColumnUniqueIdentifier, Object> DICTIONARY_LOCK_OBJECT =
new HashMap<>();
+ private static final long sizeOfEmptyDictChunks =
+ ObjectSizeCalculator.estimate(new ArrayList<byte[]>(CarbonUtil.getDictionaryChunkSize()), 16);
+
+ private static final long byteArraySize = ObjectSizeCalculator.estimate(new byte[0], 16);
+
/**
* @param carbonStorePath
* @param carbonLRUCache
@@ -231,4 +239,38 @@ public class ForwardDictionaryCache<K extends
cacheable.clear();
}
}
+
+ @Override protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo,
+ CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk,
+ DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean
+ readSortIndexSize) throws IOException {
+ // required size will be size total size of file - offset till file is
+ // already read
+ long requiredSize =
+ carbonDictionaryColumnMetaChunk.getEnd_offset() -
+ dictionaryInfo.getOffsetTillFileIsRead();
+
+ long numOfRecords = dictionaryInfo.getOffsetTillFileIsRead() == 0 ?
+ carbonDictionaryColumnMetaChunk.getMax_surrogate_key() :
+ carbonDictionaryColumnMetaChunk.getMax_surrogate_key()
+ - getNumRecordsInCarbonDictionaryColumnMetaChunk(
+ dictionaryColumnUniqueIdentifier,
+ dictionaryInfo.getOffsetTillFileIsRead());
+
+ if (numOfRecords > 0) {
+ long avgRecordsSize = requiredSize / numOfRecords;
+ long bytesPerRecord = (long)Math.ceil(avgRecordsSize / 8.0) * 8;
+
+ requiredSize = (bytesPerRecord + byteArraySize) * numOfRecords;
+ }
+
+ if (readSortIndexSize) {
+ // every time we are loading all the sort index files.Hence memory calculation for all
+ // the records
+ requiredSize = requiredSize + getSortIndexSize(
+ carbonDictionaryColumnMetaChunk.getMax_surrogate_key());
+ }
+
+ return requiredSize + sizeOfEmptyDictChunks;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java
index d63d42a..28568b5 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@@ -30,6 +31,9 @@ import org.apache.carbondata.common.logging.LogService;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.cache.CacheType;
import org.apache.carbondata.core.cache.CarbonLRUCache;
+import org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
/**
* This class implements methods to create dictionary cache which will hold
@@ -45,6 +49,20 @@ public class ReverseDictionaryCache<K extends DictionaryColumnUniqueIdentifier,
private static final LogService LOGGER =
LogServiceFactory.getLogService(ReverseDictionaryCache.class.getName());
+ private static final long sizeOfEmptyDictChunks =
+ ObjectSizeCalculator.estimate(new ArrayList<byte[]>(CarbonUtil.getDictionaryChunkSize()), 16);
+
+ private static final long sizeOfEmptyHashMap = ObjectSizeCalculator.estimate(new
+ ConcurrentHashMap<DictionaryByteArrayWrapper,
+ Integer>(CarbonUtil.getDictionaryChunkSize()), 16);
+
+ private static final long sizeOfHashMapNode = ObjectSizeCalculator.estimate(new
+ DictionaryByteArrayWrapper(new byte[0]), 16) +
+ ObjectSizeCalculator.estimate(new Integer(0), 16);
+
+ private static final long byteArraySize = ObjectSizeCalculator.estimate(new byte[0], 16);
+
+
/**
* @param carbonStorePath
* @param carbonLRUCache
@@ -209,4 +227,40 @@ public class ReverseDictionaryCache<K extends DictionaryColumnUniqueIdentifier,
cacheable.clear();
}
}
+
+ @Override protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo,
+ CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk,
+ DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean
+ readSortIndexSize) throws IOException {
+ // required size will be size total size of file - offset till file is
+ // already read
+ long requiredSize =
+ carbonDictionaryColumnMetaChunk.getEnd_offset() -
+ dictionaryInfo.getOffsetTillFileIsRead();
+
+ long numOfRecords = dictionaryInfo.getOffsetTillFileIsRead() == 0 ?
+ carbonDictionaryColumnMetaChunk.getMax_surrogate_key() :
+ carbonDictionaryColumnMetaChunk.getMax_surrogate_key()
+ - getNumRecordsInCarbonDictionaryColumnMetaChunk(
+ dictionaryColumnUniqueIdentifier,
+ dictionaryInfo.getOffsetTillFileIsRead());
+
+ if (numOfRecords > 0) {
+ long avgRecordsSize = requiredSize / numOfRecords;
+ long bytesPerRecord = (long)Math.ceil(avgRecordsSize / 8.0) * 8;
+
+ requiredSize = (bytesPerRecord + byteArraySize) * numOfRecords;
+ }
+
+ if (readSortIndexSize) {
+ // every time we are loading all the sort index files.Hence memory calculation for all
+ // the records
+ requiredSize = requiredSize + getSortIndexSize(
+ carbonDictionaryColumnMetaChunk.getMax_surrogate_key());
+ }
+
+ requiredSize = requiredSize + (sizeOfHashMapNode * numOfRecords);
+
+ return requiredSize + sizeOfEmptyDictChunks + sizeOfEmptyHashMap;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java b/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java
index 5e8c8a3..8cfc602 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java
@@ -32,6 +32,7 @@ import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
import org.apache.carbondata.core.datastore.exception.IndexBuilderException;
import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
/**
* This class validate and load the B-Tree in the executor lru cache
@@ -92,13 +93,14 @@ public abstract class AbstractBlockIndexStoreCache<K, V>
TableBlockInfo blockInfo = tableBlockUniqueIdentifier.getTableBlockInfo();
long requiredMetaSize = CarbonUtil.calculateMetaSize(blockInfo);
if (requiredMetaSize > 0) {
- tableBlock.setMemorySize(requiredMetaSize);
// load table blocks data
// getting the data file meta data of the block
DataFileFooter footer = CarbonUtil.readMetadatFile(blockInfo);
footer.setBlockInfo(new BlockInfo(blockInfo));
// building the block
tableBlock.buildIndex(Collections.singletonList(footer));
+ requiredMetaSize = ObjectSizeCalculator.estimate(blockInfo, requiredMetaSize);
+ tableBlock.setMemorySize(requiredMetaSize);
tableBlock.incrementAccessCount();
boolean isTableBlockAddedToLruCache = lruCache.put(lruCacheKey, tableBlock, requiredMetaSize);
if (!isTableBlockAddedToLruCache) {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java
index 734aaaf..980ea3e 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java
@@ -43,6 +43,7 @@ import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.mutate.UpdateVO;
import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager;
import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.ObjectSizeCalculator;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.core.util.path.CarbonTablePath.DataFileUtil;
@@ -233,11 +234,10 @@ public class SegmentTaskIndexStore
taskIdToTableBlockInfoMap.entrySet().iterator();
long requiredSize =
calculateRequiredSize(taskIdToTableBlockInfoMap, absoluteTableIdentifier);
- segmentTaskIndexWrapper
- .setMemorySize(requiredSize + segmentTaskIndexWrapper.getMemorySize());
- boolean isAddedToLruCache =
- lruCache.put(lruCacheKey, segmentTaskIndexWrapper, requiredSize);
- if (isAddedToLruCache) {
+ segmentTaskIndexWrapper.setMemorySize(requiredSize);
+ boolean canAddToLruCache =
+ lruCache.tryPut(lruCacheKey, requiredSize);
+ if (canAddToLruCache) {
while (iterator.hasNext()) {
Map.Entry<TaskBucketHolder, List<TableBlockInfo>> taskToBlockInfoList =
iterator.next();
@@ -246,6 +246,15 @@ public class SegmentTaskIndexStore
loadBlocks(taskBucketHolder, taskToBlockInfoList.getValue(),
absoluteTableIdentifier));
}
+ long updatedRequiredSize =
+ ObjectSizeCalculator.estimate(segmentTaskIndexWrapper, requiredSize);
+ // update the actual size of object
+ segmentTaskIndexWrapper.setMemorySize(updatedRequiredSize);
+ if (!lruCache.put(lruCacheKey, segmentTaskIndexWrapper, updatedRequiredSize)) {
+ throw new IndexBuilderException(
+ "Can not load the segment. No Enough space available.");
+ }
+
} else {
throw new IndexBuilderException(
"Can not load the segment. No Enough space available.");
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java
index f2e09ec..cc14187 100644
--- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java
+++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java
@@ -53,4 +53,17 @@ public interface CarbonDictionaryMetadataReader extends Closeable {
* @throws IOException if an I/O error occurs
*/
CarbonDictionaryColumnMetaChunk readLastEntryOfDictionaryMetaChunk() throws IOException;
+
+ /**
+ * This method will be used to read the last dictionary meta chunk ending at end_Offset.
+ * Applicable scenarios :
+ * 1. When loading into LRU cache, we need to calculate the size of Object in memory,for
+ * this we need the number of records already loaded into LRU cache, so that we can calculate
+ * the memory required for incremental load
+ *
+ * @return last segment entry for dictionary chunk
+ * @throws IOException if an I/O error occurs
+ */
+ CarbonDictionaryColumnMetaChunk readEntryOfDictionaryMetaChunk(long end_Offset)
+ throws IOException;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java
index 98c0ddb..9356974 100644
--- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java
@@ -133,6 +133,23 @@ public class CarbonDictionaryMetadataReaderImpl implements CarbonDictionaryMetad
return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
}
+ @Override public CarbonDictionaryColumnMetaChunk readEntryOfDictionaryMetaChunk(long end_Offset)
+ throws IOException {
+ ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
+ // open dictionary meta thrift reader
+ openThriftReader();
+ // at the completion of while loop we will get the last dictionary chunk entry
+ while (dictionaryMetadataFileReader.hasNext()) {
+ // get the thrift object for dictionary chunk
+ dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
+ if (dictionaryChunkMeta.end_offset >= end_Offset) {
+ break;
+ }
+ }
+ // create a new instance of chunk meta wrapper using thrift object
+ return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
+ }
+
/**
* Closes this stream and releases any system resources associated
* with it. If the stream is already closed then invoking this
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java b/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java
new file mode 100644
index 0000000..513e786
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.util;
+
+import java.lang.reflect.Method;
+
+import org.apache.carbondata.common.logging.LogService;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+
+/**
+ * This wrapper class is created so that core doesnt have direct dependency on spark
+ * TODO: Need to have carbon implementation if carbon needs to be used without spark
+ */
+public final class ObjectSizeCalculator {
+ /**
+ * Logger object for the class
+ */
+ private static final LogService LOGGER =
+ LogServiceFactory.getLogService(ObjectSizeCalculator.class.getName());
+
+ /**
+ * Class of spark to invoke
+ */
+ private static String className = "org.apache.spark.util.SizeEstimator";
+
+ private static Method estimateMethod = null;
+
+ private static boolean methodAccessible = true;
+
+ /**
+ * Invoke the spark's implementation of Object size computation
+ * return the default value passed if function cannot be invoked
+ * @param anObject
+ * @param defValue
+ * @return
+ */
+ public static long estimate(Object anObject, long defValue) {
+ try {
+ if (methodAccessible) {
+ if (null == estimateMethod) {
+ estimateMethod = Class.forName(className).getMethod("estimate", Object.class);
+ estimateMethod.setAccessible(true);
+ }
+ return (Long) estimateMethod.invoke(null, anObject);
+ } else {
+ return defValue;
+ }
+ } catch (Throwable ex) {
+ // throwable is being caught as external interface is being invoked through reflection
+ // and runtime exceptions might get thrown
+ LOGGER.error(ex, "Could not access method SizeEstimator:estimate.Returning default value");
+ methodAccessible = false;
+ return defValue;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java b/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java
index 628c3ff..b06fc4d 100644
--- a/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java
@@ -206,7 +206,7 @@ public class ReverseDictionaryCacheTest extends AbstractDictionaryCacheTest {
// available size limit
new MockUp<CarbonDictionaryColumnMetaChunk>() {
@Mock public long getEnd_offset() {
- return 10485755L;
+ return 10445000L;
}
};
columnIdentifier = columnIdentifiers[1];
[2/2] carbondata git commit: [CARBONDATA-1179] Improve Size
calculation of Objects of LRU cache. This closes #1038.
Posted by gv...@apache.org.
[CARBONDATA-1179] Improve Size calculation of Objects of LRU cache. This closes #1038.
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c8f742d4
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c8f742d4
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c8f742d4
Branch: refs/heads/master
Commit: c8f742d434e29a6a9000815d9e7921462120c77d
Parents: ac2168a 377dee9
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Thu Jun 29 11:41:35 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 11:41:35 2017 +0530
----------------------------------------------------------------------
.../carbondata/core/cache/CarbonLRUCache.java | 29 +++++++
.../AbstractColumnDictionaryInfo.java | 15 +++-
.../dictionary/AbstractDictionaryCache.java | 89 +++++++++++++++++---
.../core/cache/dictionary/DictionaryInfo.java | 12 +++
.../dictionary/ForwardDictionaryCache.java | 42 +++++++++
.../dictionary/ReverseDictionaryCache.java | 54 ++++++++++++
.../datastore/AbstractBlockIndexStoreCache.java | 4 +-
.../core/datastore/SegmentTaskIndexStore.java | 19 +++--
.../reader/CarbonDictionaryMetadataReader.java | 13 +++
.../CarbonDictionaryMetadataReaderImpl.java | 17 ++++
.../core/util/ObjectSizeCalculator.java | 71 ++++++++++++++++
.../dictionary/ReverseDictionaryCacheTest.java | 2 +-
12 files changed, 347 insertions(+), 20 deletions(-)
----------------------------------------------------------------------