You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2012/01/25 03:19:38 UTC
[1/2] git commit: Reduce memory used by primary index sample patch by
Radim Kolar and yukim; reviewed by jbellis for CASSANDRA-3743
Updated Branches:
refs/heads/trunk 37b079352 -> 86a7a3d1a
Reduce memory used by primary index sample
patch by Radim Kolar and yukim; reviewed by jbellis for CASSANDRA-3743
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/86a7a3d1
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/86a7a3d1
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/86a7a3d1
Branch: refs/heads/trunk
Commit: 86a7a3d1a0ba38e9c1f110814517f1b7630cfa51
Parents: 372a3ad
Author: Jonathan Ellis <jb...@apache.org>
Authored: Tue Jan 24 20:17:47 2012 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Tue Jan 24 20:19:27 2012 -0600
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/io/sstable/IndexSummary.java | 55 +++++----------
.../apache/cassandra/io/sstable/SSTableReader.java | 46 +++++-------
3 files changed, 37 insertions(+), 65 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/86a7a3d1/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 06ac8a0..07b6213 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
1.1-dev
+ * Reduce memory used by primary index sample (CASSANDRA-3743)
* (Hadoop) separate input/output configurations (CASSANDRA-3197, 3765)
* avoid returning internal Cassandra classes over JMX (CASSANDRA-2805)
* add row-level isolation via SnapTree (CASSANDRA-2893)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/86a7a3d1/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/IndexSummary.java b/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
index 68530cf..4c6efb5 100644
--- a/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
+++ b/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
@@ -1,6 +1,6 @@
package org.apache.cassandra.io.sstable;
/*
- *
+ *
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -8,16 +8,16 @@ package org.apache.cassandra.io.sstable;
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
- *
+ *
*/
@@ -35,7 +35,8 @@ import org.apache.cassandra.db.RowPosition;
*/
public class IndexSummary
{
- private ArrayList<KeyPosition> indexPositions;
+ private ArrayList<Long> positions;
+ private ArrayList<DecoratedKey> keys;
private long keysWritten = 0;
public IndexSummary(long expectedKeys)
@@ -44,7 +45,8 @@ public class IndexSummary
if (expectedEntries > Integer.MAX_VALUE)
// TODO: that's a _lot_ of keys, or a very low interval
throw new RuntimeException("Cannot use index_interval of " + DatabaseDescriptor.getIndexInterval() + " with " + expectedKeys + " (expected) keys.");
- indexPositions = new ArrayList<KeyPosition>((int)expectedEntries);
+ positions = new ArrayList<Long>((int)expectedEntries);
+ keys = new ArrayList<DecoratedKey>((int)expectedEntries);
}
public void incrementRowid()
@@ -59,7 +61,8 @@ public class IndexSummary
public void addEntry(DecoratedKey<?> key, long indexPosition)
{
- indexPositions.add(new KeyPosition(SSTable.getMinimalKey(key), indexPosition));
+ keys.add(SSTable.getMinimalKey(key));
+ positions.add(indexPosition);
}
public void maybeAddEntry(DecoratedKey<?> decoratedKey, long indexPosition)
@@ -69,43 +72,19 @@ public class IndexSummary
incrementRowid();
}
- public List<KeyPosition> getIndexPositions()
+ public List<DecoratedKey> getKeys()
{
- return indexPositions;
+ return keys;
}
- public void complete()
+ public long getPosition(int index)
{
- indexPositions.trimToSize();
+ return positions.get(index);
}
- /**
- * This is a simple container for the index Key and its corresponding position
- * in the index file. Binary search is performed on a list of these objects
- * to find where to start looking for the index entry containing the data position
- * (which will be turned into a PositionSize object)
- */
- public static final class KeyPosition implements Comparable<KeyPosition>
+ public void complete()
{
- // We allow RowPosition for the purpose of being able to select keys given a token, but the index
- // should only contain true user provided keys, i.e. DecoratedKey, which is enforced by addEntry.
- public final RowPosition key;
- public final long indexPosition;
-
- public KeyPosition(RowPosition key, long indexPosition)
- {
- this.key = key;
- this.indexPosition = indexPosition;
- }
-
- public int compareTo(KeyPosition kp)
- {
- return key.compareTo(kp.key);
- }
-
- public String toString()
- {
- return key + ":" + indexPosition;
- }
+ keys.trimToSize();
+ positions.trimToSize();
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/86a7a3d1/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
index 324c460..0162249 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -413,22 +413,22 @@ public class SSTableReader extends SSTable
}
/** get the position in the index file to start scanning to find the given key (at most indexInterval keys away) */
- private IndexSummary.KeyPosition getIndexScanPosition(RowPosition key)
+ private long getIndexScanPosition(RowPosition key)
{
- assert indexSummary.getIndexPositions() != null && indexSummary.getIndexPositions().size() > 0;
- int index = Collections.binarySearch(indexSummary.getIndexPositions(), new IndexSummary.KeyPosition(key, -1));
+ assert indexSummary.getKeys() != null && indexSummary.getKeys().size() > 0;
+ int index = Collections.binarySearch(indexSummary.getKeys(), key);
if (index < 0)
{
// binary search gives us the first index _greater_ than the key searched for,
// i.e., its insertion position
int greaterThan = (index + 1) * -1;
if (greaterThan == 0)
- return null;
- return indexSummary.getIndexPositions().get(greaterThan - 1);
+ return -1;
+ return indexSummary.getPosition(greaterThan - 1);
}
else
{
- return indexSummary.getIndexPositions().get(index);
+ return indexSummary.getPosition(index);
}
}
@@ -470,7 +470,7 @@ public class SSTableReader extends SSTable
*/
public long estimatedKeys()
{
- return indexSummary.getIndexPositions().size() * DatabaseDescriptor.getIndexInterval();
+ return indexSummary.getKeys().size() * DatabaseDescriptor.getIndexInterval();
}
/**
@@ -480,7 +480,7 @@ public class SSTableReader extends SSTable
public long estimatedKeysForRanges(Collection<Range<Token>> ranges)
{
long sampleKeyCount = 0;
- List<Pair<Integer, Integer>> sampleIndexes = getSampleIndexesForRanges(indexSummary.getIndexPositions(), ranges);
+ List<Pair<Integer, Integer>> sampleIndexes = getSampleIndexesForRanges(indexSummary.getKeys(), ranges);
for (Pair<Integer, Integer> sampleIndexRange : sampleIndexes)
sampleKeyCount += (sampleIndexRange.right - sampleIndexRange.left + 1);
return Math.max(1, sampleKeyCount * DatabaseDescriptor.getIndexInterval());
@@ -491,18 +491,10 @@ public class SSTableReader extends SSTable
*/
public Collection<DecoratedKey> getKeySamples()
{
- return Collections2.transform(indexSummary.getIndexPositions(),
- new Function<IndexSummary.KeyPosition, DecoratedKey>(){
- public DecoratedKey apply(IndexSummary.KeyPosition kp)
- {
- // the index should only contain valid row key, we only allow RowPosition in KeyPosition for search purposes
- assert kp.key instanceof DecoratedKey;
- return (DecoratedKey)kp.key;
- }
- });
+ return indexSummary.getKeys();
}
- private static List<Pair<Integer,Integer>> getSampleIndexesForRanges(List<IndexSummary.KeyPosition> samples, Collection<Range<Token>> ranges)
+ private static List<Pair<Integer,Integer>> getSampleIndexesForRanges(List<DecoratedKey> samples, Collection<Range<Token>> ranges)
{
// use the index to determine a minimal section for each range
List<Pair<Integer,Integer>> positions = new ArrayList<Pair<Integer,Integer>>();
@@ -514,7 +506,7 @@ public class SSTableReader extends SSTable
RowPosition leftPosition = range.left.maxKeyBound();
RowPosition rightPosition = range.left.maxKeyBound();
- int left = Collections.binarySearch(samples, new IndexSummary.KeyPosition(leftPosition, -1));
+ int left = Collections.binarySearch(samples, leftPosition);
if (left < 0)
left = (left + 1) * -1;
else
@@ -526,7 +518,7 @@ public class SSTableReader extends SSTable
int right = Range.isWrapAround(range.left, range.right)
? samples.size() - 1
- : Collections.binarySearch(samples, new IndexSummary.KeyPosition(rightPosition, -1));
+ : Collections.binarySearch(samples, rightPosition);
if (right < 0)
{
// range are end inclusive so we use the previous index from what binarySearch give us
@@ -548,7 +540,7 @@ public class SSTableReader extends SSTable
public Iterable<DecoratedKey> getKeySamples(final Range<Token> range)
{
- final List<IndexSummary.KeyPosition> samples = indexSummary.getIndexPositions();
+ final List<DecoratedKey> samples = indexSummary.getKeys();
final List<Pair<Integer, Integer>> indexRanges = getSampleIndexesForRanges(samples, Collections.singletonList(range));
@@ -583,7 +575,7 @@ public class SSTableReader extends SSTable
public DecoratedKey next()
{
- RowPosition k = samples.get(idx++).key;
+ RowPosition k = samples.get(idx++);
// the index should only contain valid row key, we only allow RowPosition in KeyPosition for search purposes
assert k instanceof DecoratedKey;
return (DecoratedKey)k;
@@ -674,8 +666,8 @@ public class SSTableReader extends SSTable
}
// next, see if the sampled index says it's impossible for the key to be present
- IndexSummary.KeyPosition sampledPosition = getIndexScanPosition(key);
- if (sampledPosition == null)
+ long sampledPosition = getIndexScanPosition(key);
+ if (sampledPosition == -1)
{
if (op == Operator.EQ)
bloomFilterTracker.addFalsePositive();
@@ -684,7 +676,7 @@ public class SSTableReader extends SSTable
}
// scan the on-disk index, starting at the nearest sampled position
- Iterator<FileDataInput> segments = ifile.iterator(sampledPosition.indexPosition, INDEX_FILE_BUFFER_BYTES);
+ Iterator<FileDataInput> segments = ifile.iterator(sampledPosition, INDEX_FILE_BUFFER_BYTES);
while (segments.hasNext())
{
FileDataInput input = segments.next();