You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/12/19 12:22:13 UTC
svn commit: r1646674 - in /lucene/dev/trunk/lucene/core/src:
java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java
java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java
test/org/apache/lucene/util/TestFrequencyTrackingRingBuffer.java
Author: jpountz
Date: Fri Dec 19 11:22:13 2014
New Revision: 1646674
URL: http://svn.apache.org/r1646674
Log:
LUCENE-6118: Improve the efficiency of the history structure for recently-used filters.
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFrequencyTrackingRingBuffer.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java?rev=1646674&r1=1646673&r2=1646674&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java Fri Dec 19 11:22:13 2014
@@ -36,6 +36,9 @@ import org.apache.lucene.util.FrequencyT
*/
public final class UsageTrackingFilterCachingPolicy implements FilterCachingPolicy {
+ // the hash code that we use as a sentinel in the ring buffer.
+ private static final int SENTINEL = Integer.MIN_VALUE;
+
static boolean isCostly(Filter filter) {
// This does not measure the cost of iterating over the filter (for this we
// already have the DocIdSetIterator#cost API) but the cost to build the
@@ -51,7 +54,7 @@ public final class UsageTrackingFilterCa
}
private final FilterCachingPolicy.CacheOnLargeSegments segmentPolicy;
- private final FrequencyTrackingRingBuffer<Integer> recentlyUsedFilters;
+ private final FrequencyTrackingRingBuffer recentlyUsedFilters;
private final int minFrequencyCostlyFilters;
private final int minFrequencyCheapFilters;
private final int minFrequencyOtherFilters;
@@ -96,7 +99,7 @@ public final class UsageTrackingFilterCa
if (minFrequencyCheapFilters > historySize || minFrequencyCostlyFilters > historySize || minFrequencyOtherFilters > historySize) {
throw new IllegalArgumentException("The minimum frequencies should be less than the size of the history of filters that are being tracked");
}
- this.recentlyUsedFilters = new FrequencyTrackingRingBuffer<>(historySize);
+ this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
this.minFrequencyCostlyFilters = minFrequencyCostlyFilters;
this.minFrequencyCheapFilters = minFrequencyCheapFilters;
this.minFrequencyOtherFilters = minFrequencyOtherFilters;
@@ -104,9 +107,10 @@ public final class UsageTrackingFilterCa
@Override
public void onUse(Filter filter) {
- // Using the filter hash codes might help keep memory usage a bit lower
- // since some filters might have non-negligible memory usage?
- recentlyUsedFilters.add(filter.hashCode());
+ // we only track hash codes, which
+ synchronized (this) {
+ recentlyUsedFilters.add(filter.hashCode());
+ }
}
@Override
@@ -114,7 +118,10 @@ public final class UsageTrackingFilterCa
if (segmentPolicy.shouldCache(filter, context, set) == false) {
return false;
}
- final int frequency = recentlyUsedFilters.frequency(filter.hashCode());
+ final int frequency;
+ synchronized (this) {
+ frequency = recentlyUsedFilters.frequency(filter.hashCode());
+ }
if (frequency >= minFrequencyOtherFilters) {
return true;
} else if (isCostly(filter) && frequency >= minFrequencyCostlyFilters) {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java?rev=1646674&r1=1646673&r2=1646674&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java Fri Dec 19 11:22:13 2014
@@ -17,75 +17,202 @@ package org.apache.lucene.util;
* limitations under the License.
*/
-import java.util.ArrayDeque;
-import java.util.Collections;
-import java.util.Deque;
+import java.util.Arrays;
+import java.util.HashMap;
import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
/**
- * A ring buffer that tracks the frequency of the items that it contains.
- * This is typically useful to track popular recently-used items.
+ * A ring buffer that tracks the frequency of the integers that it contains.
+ * This is typically useful to track the hash codes of popular recently-used
+ * items.
*
- * This class is thread-safe.
+ * This data-structure requires 22 bytes per entry on average (between 16 and
+ * 28).
*
* @lucene.internal
*/
-public final class FrequencyTrackingRingBuffer<T> {
+public final class FrequencyTrackingRingBuffer implements Accountable {
- private final int maxSize;
- private final Deque<T> ringBuffer;
- private final ConcurrentMap<T, Integer> frequencies;
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FrequencyTrackingRingBuffer.class);
- /** Create a new ring buffer that will contain at most <code>size</code> items. */
- public FrequencyTrackingRingBuffer(int maxSize) {
+ private final int maxSize;
+ private final int[] buffer;
+ private int position;
+ private final IntBag frequencies;
+
+ /** Create a new ring buffer that will contain at most <code>maxSize</code> items.
+ * This buffer will initially contain <code>maxSize</code> times the
+ * <code>sentinel</code> value. */
+ public FrequencyTrackingRingBuffer(int maxSize, int sentinel) {
+ if (maxSize < 2) {
+ throw new IllegalArgumentException("maxSize must be at least 2");
+ }
this.maxSize = maxSize;
- this.ringBuffer = new ArrayDeque<>(maxSize);
- this.frequencies = new ConcurrentHashMap<>();
+ buffer = new int[maxSize];
+ position = 0;
+ frequencies = new IntBag(maxSize);
+
+ Arrays.fill(buffer, sentinel);
+ for (int i = 0; i < maxSize; ++i) {
+ frequencies.add(sentinel);
+ }
+ assert frequencies.frequency(sentinel) == maxSize;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES_USED
+ + frequencies.ramBytesUsed()
+ + RamUsageEstimator.sizeOf(buffer);
}
/**
* Add a new item to this ring buffer, potentially removing the oldest
* entry from this buffer if it is already full.
*/
- public synchronized void add(T item) {
- // we need this method to be protected by a lock since it is important for
- // correctness that the ring buffer and the frequencies table have
- // consistent content
- if (item == null) {
- throw new IllegalArgumentException("null items are not supported");
- }
- assert ringBuffer.size() <= maxSize;
- if (ringBuffer.size() == maxSize) {
- // evict the oldest entry
- final T removed = ringBuffer.removeFirst();
- final int newFrequency = frequency(removed) - 1;
- if (newFrequency == 0) {
- // free for GC
- frequencies.remove(removed);
- } else {
- frequencies.put(removed, newFrequency);
- }
+ public void add(int i) {
+ // remove the previous value
+ final int removed = buffer[position];
+ final boolean removedFromBag = frequencies.remove(removed);
+ assert removedFromBag;
+ // add the new value
+ buffer[position] = i;
+ frequencies.add(i);
+ // increment the position
+ position += 1;
+ if (position == maxSize) {
+ position = 0;
}
-
- // add the new entry and update frequencies
- ringBuffer.addLast(item);
- frequencies.put(item, frequency(item) + 1);
}
/**
- * Returns the frequency of the provided item in the ring buffer.
+ * Returns the frequency of the provided key in the ring buffer.
*/
- public int frequency(T item) {
- // The use of a concurrent hash map allows us to not use a lock for this read-only method
- final Integer freq = frequencies.get(item);
- return freq == null ? 0 : freq;
+ public int frequency(int key) {
+ return frequencies.frequency(key);
}
// pkg-private for testing
- Map<T, Integer> asFrequencyMap() {
- return Collections.unmodifiableMap(frequencies);
+ Map<Integer, Integer> asFrequencyMap() {
+ return frequencies.asMap();
+ }
+
+ /**
+ * A bag of integers.
+ * Since in the context of the ring buffer the maximum size is known up-front
+ * there is no need to worry about resizing the underlying storage.
+ */
+ private static class IntBag implements Accountable {
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(IntBag.class);
+
+ private final int[] keys;
+ private final int[] freqs;
+ private final int mask;
+
+ IntBag(int maxSize) {
+ // load factor of 2/3
+ int capacity = Math.max(2, maxSize * 3 / 2);
+ // round up to the next power of two
+ capacity = Integer.highestOneBit(capacity - 1) << 1;
+ assert capacity > maxSize;
+ keys = new int[capacity];
+ freqs = new int[capacity];
+ mask = capacity - 1;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES_USED
+ + RamUsageEstimator.sizeOf(keys)
+ + RamUsageEstimator.sizeOf(freqs);
+ }
+
+ /** Return the frequency of the give key in the bag. */
+ int frequency(int key) {
+ for (int slot = key & mask; ; slot = (slot + 1) & mask) {
+ if (keys[slot] == key) {
+ return freqs[slot];
+ } else if (freqs[slot] == 0) {
+ return 0;
+ }
+ }
+ }
+
+ /** Increment the frequency of the given key by 1 and return its new frequency. */
+ int add(int key) {
+ for (int slot = key & mask; ; slot = (slot + 1) & mask) {
+ if (freqs[slot] == 0) {
+ keys[slot] = key;
+ return freqs[slot] = 1;
+ } else if (keys[slot] == key) {
+ return ++freqs[slot];
+ }
+ }
+ }
+
+ /** Decrement the frequency of the given key by one, or do nothing if the
+ * key is not present in the bag. Returns true iff the key was contained
+ * in the bag. */
+ boolean remove(int key) {
+ for (int slot = key & mask; ; slot = (slot + 1) & mask) {
+ if (freqs[slot] == 0) {
+ // no such key in the bag
+ return false;
+ } else if (keys[slot] == key) {
+ final int newFreq = --freqs[slot];
+ if (newFreq == 0) { // removed
+ relocateAdjacentKeys(slot);
+ }
+ return true;
+ }
+ }
+ }
+
+ private void relocateAdjacentKeys(int freeSlot) {
+ for (int slot = (freeSlot + 1) & mask; ; slot = (slot + 1) & mask) {
+ final int freq = freqs[slot];
+ if (freq == 0) {
+ // end of the collision chain, we're done
+ break;
+ }
+ final int key = keys[slot];
+ // the slot where <code>key</code> should be if there were no collisions
+ final int expectedSlot = key & mask;
+ // if the free slot is between the expected slot and the slot where the
+ // key is, then we can relocate there
+ if (between(expectedSlot, slot, freeSlot)) {
+ keys[freeSlot] = key;
+ freqs[freeSlot] = freq;
+ // slot is the new free slot
+ freqs[slot] = 0;
+ freeSlot = slot;
+ }
+ }
+ }
+
+ /** Given a chain of occupied slots between <code>chainStart</code>
+ * and <code>chainEnd</code>, return whether <code>slot</code> is
+ * between the start and end of the chain. */
+ private static boolean between(int chainStart, int chainEnd, int slot) {
+ if (chainStart <= chainEnd) {
+ return chainStart <= slot && slot <= chainEnd;
+ } else {
+ // the chain is across the end of the array
+ return slot >= chainStart || slot <= chainEnd;
+ }
+ }
+
+ Map<Integer, Integer> asMap() {
+ Map<Integer, Integer> map = new HashMap<>();
+ for (int i = 0; i < keys.length; ++i) {
+ if (freqs[i] > 0) {
+ map.put(keys[i], freqs[i]);
+ }
+ }
+ return map;
+ }
+
}
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFrequencyTrackingRingBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFrequencyTrackingRingBuffer.java?rev=1646674&r1=1646673&r2=1646674&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFrequencyTrackingRingBuffer.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFrequencyTrackingRingBuffer.java Fri Dec 19 11:22:13 2014
@@ -24,15 +24,19 @@ import java.util.Map;
public class TestFrequencyTrackingRingBuffer extends LuceneTestCase {
- private static <T> void assertBuffer(FrequencyTrackingRingBuffer<T> buffer, int maxSize, List<T> items) {
- final List<T> recentItems;
+ private static void assertBuffer(FrequencyTrackingRingBuffer buffer, int maxSize, int sentinel, List<Integer> items) {
+ final List<Integer> recentItems;
if (items.size() <= maxSize) {
- recentItems = items;
+ recentItems = new ArrayList<>();
+ for (int i = items.size(); i < maxSize; ++i) {
+ recentItems.add(sentinel);
+ }
+ recentItems.addAll(items);
} else {
recentItems = items.subList(items.size() - maxSize, items.size());
}
- final Map<T, Integer> expectedFrequencies = new HashMap<T, Integer>();
- for (T item : recentItems) {
+ final Map<Integer, Integer> expectedFrequencies = new HashMap<Integer, Integer>();
+ for (Integer item : recentItems) {
final Integer freq = expectedFrequencies.get(item);
if (freq == null) {
expectedFrequencies.put(item, 1);
@@ -46,18 +50,29 @@ public class TestFrequencyTrackingRingBu
public void test() {
final int iterations = atLeast(100);
for (int i = 0; i < iterations; ++i) {
- final int maxSize = 1 + random().nextInt(100);
- final int numitems = random().nextInt(500);
+ final int maxSize = 2 + random().nextInt(100);
+ final int numitems = random().nextInt(5000);
final int maxitem = 1 + random().nextInt(100);
List<Integer> items = new ArrayList<>();
- FrequencyTrackingRingBuffer<Integer> buffer = new FrequencyTrackingRingBuffer<>(maxSize);
+ final int sentinel = random().nextInt(200);
+ FrequencyTrackingRingBuffer buffer = new FrequencyTrackingRingBuffer(maxSize, sentinel);
for (int j = 0; j < numitems; ++j) {
final Integer item = random().nextInt(maxitem);
items.add(item);
buffer.add(item);
}
- assertBuffer(buffer, maxSize, items);
+ assertBuffer(buffer, maxSize, sentinel, items);
+ }
+ }
+
+ public void testRamBytesUsed() {
+ final int maxSize = 2 + random().nextInt(10000);
+ final int sentinel = random().nextInt();
+ FrequencyTrackingRingBuffer buffer = new FrequencyTrackingRingBuffer(maxSize, sentinel);
+ for (int i = 0; i < 10000; ++i) {
+ buffer.add(random().nextInt());
}
+ assertEquals(RamUsageTester.sizeOf(buffer), buffer.ramBytesUsed());
}
}