You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2015/04/15 16:41:29 UTC
svn commit: r1673789 - in /jackrabbit/oak/branches/1.0/oak-core/src:
main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java
test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java
Author: thomasm
Date: Wed Apr 15 14:41:29 2015
New Revision: 1673789
URL: http://svn.apache.org/r1673789
Log:
OAK-2752 SegmentIdTable can sometimes hang when calling getSegmentId(msb, lsb)
Added:
jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java
Modified:
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java
Modified: jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java?rev=1673789&r1=1673788&r2=1673789&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (original)
+++ jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java Wed Apr 15 14:41:29 2015
@@ -23,9 +23,12 @@ import static java.util.Collections.nCop
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.List;
import java.util.Map;
import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Hash table of weak references to segment identifiers.
@@ -33,28 +36,53 @@ import org.apache.jackrabbit.oak.plugins
public class SegmentIdTable {
/**
- * Hash table of weak references to segment identifiers that are
- * currently being accessed. The size of the table is always a power
- * of two, which optimizes the {@link #expand()} operation. The table is
- * indexed by the random identifier bits, which guarantees uniform
- * distribution of entries. Each table entry is either {@code null}
- * (when there are no matching identifiers) or a list of weak references
- * to the matching identifiers.
+ * The list of weak references to segment identifiers that are currently
+ * being accessed. This represents a hash table that uses open addressing
+ * with linear probing. It is not a hash map, to speed up read access.
+ * <p>
+ * The size of the table is always a power of two, so that we can use
+ * bitwise "and" instead of modulo.
+ * <p>
+ * The table is indexed by the random identifier bits, which guarantees
+ * uniform distribution of entries.
+ * <p>
+ * Open addressing with linear probing is used. Each table entry is either
+ * null (when there are no matching identifiers), a weak references to the
+ * matching identifier, or a weak reference to another identifier.
+ * There are no tombstone entries as there is no explicit remove operation,
+ * but a referent can become null if the entry is garbage collected.
+ * <p>
+ * The array is not sorted (we could; lookup might be faster, but adding
+ * entries would be slower).
*/
private final ArrayList<WeakReference<SegmentId>> references =
newArrayList(nCopies(1024, (WeakReference<SegmentId>) null));
private final SegmentTracker tracker;
+
+ private static final Logger LOG = LoggerFactory.getLogger(SegmentIdTable.class);
+
+
+ /**
+ * The refresh count (for diagnostics and testing).
+ */
+ private int rebuildCount;
+
+ /**
+ * The number of used entries (WeakReferences) in this table.
+ */
+ private int entryCount;
SegmentIdTable(SegmentTracker tracker) {
this.tracker = tracker;
}
/**
+ * Get the segment id, and reference it in the weak references map.
*
* @param msb
* @param lsb
- * @return
+ * @return the segment id
*/
synchronized SegmentId getSegmentId(long msb, long lsb) {
int first = getIndex(lsb);
@@ -69,13 +97,20 @@ public class SegmentIdTable {
&& id.getLeastSignificantBits() == lsb) {
return id;
}
+ // shouldRefresh if we have a garbage collected entry
shouldRefresh = shouldRefresh || id == null;
+ // open addressing / linear probing
index = (index + 1) % references.size();
reference = references.get(index);
}
SegmentId id = new SegmentId(tracker, msb, lsb);
references.set(index, new WeakReference<SegmentId>(id));
+ entryCount++;
+ if (entryCount > references.size() * 0.75) {
+ // more than 75% full
+ shouldRefresh = true;
+ }
if (shouldRefresh) {
refresh();
}
@@ -85,7 +120,7 @@ public class SegmentIdTable {
/**
* Returns all segment identifiers that are currently referenced in memory.
*
- * @return referenced segment identifiers
+ * @param ids referenced segment identifiers
*/
void collectReferencedIds(Collection<SegmentId> ids) {
ids.addAll(refresh());
@@ -107,16 +142,33 @@ public class SegmentIdTable {
hashCollisions = hashCollisions || (i != getIndex(id));
} else {
references.set(i, null);
+ entryCount--;
emptyReferences = true;
}
}
}
+
+ if (entryCount != ids.size()) {
+ // something is wrong, possibly a concurrency problem, a SegmentId
+ // hashcode or equals bug, or a problem with this hash table
+ // algorithm
+ LOG.warn("Unexpected entry count mismatch, expected " +
+ entryCount + " got " + ids.size());
+ // we fix the count, because having a wrong entry count would be
+ // very problematic; even worse than having a concurrency problem
+ entryCount = ids.size();
+ }
while (2 * ids.size() > size) {
size *= 2;
}
+ // we need to re-build the table if the new size is different,
+ // but also if we removed some of the entries (because an entry was
+ // garbage collected) and there is at least one entry at the "wrong"
+ // location (due to open addressing)
if ((hashCollisions && emptyReferences) || size != references.size()) {
+ rebuildCount++;
references.clear();
references.addAll(nCopies(size, (WeakReference<SegmentId>) null));
@@ -150,8 +202,11 @@ public class SegmentIdTable {
SegmentId id = reference.get();
if (id != null) {
if (strategy.canRemove(id)) {
+ // we clear the reference here, but we must not
+ // remove the reference from the list, because
+ // that could cause duplicate references
+ // (there is a unit test for this case)
reference.clear();
- references.set(i, null);
dirty = true;
}
}
@@ -161,4 +216,50 @@ public class SegmentIdTable {
refresh();
}
}
+
+ /**
+ * Get the number of map rebuild operations (used for testing and diagnostics).
+ *
+ * @return the rebuild count
+ */
+ int getMapRebuildCount() {
+ return rebuildCount;
+ }
+
+ /**
+ * Get the entry count (used for testing and diagnostics).
+ *
+ * @return the entry count
+ */
+ int getEntryCount() {
+ return entryCount;
+ }
+
+ /**
+ * Get the size of the internal map (used for testing and diagnostics).
+ *
+ * @return the map size
+ */
+ int getMapSize() {
+ return references.size();
+ }
+
+ /**
+ * Get the raw list of segment ids (used for testing).
+ *
+ * @return the raw list
+ */
+ List<SegmentId> getRawSegmentIdList() {
+ ArrayList<SegmentId> list = new ArrayList<SegmentId>();
+ for (WeakReference<SegmentId> ref : references) {
+ if (ref != null) {
+ SegmentId id = ref.get();
+ if (id != null) {
+ list.add(id);
+ }
+ }
+ }
+ return list;
+ }
+
}
Added: jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java?rev=1673789&view=auto
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java (added)
+++ jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java Wed Apr 15 14:41:29 2015
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.segment;
+
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import javax.annotation.Nonnull;
+
+import junit.framework.Assert;
+
+import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy;
+import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy.CleanupType;
+import org.apache.jackrabbit.oak.plugins.segment.memory.MemoryStore;
+import org.junit.Test;
+
+public class SegmentIdTableTest {
+
+ /**
+ * OAK-2752
+ */
+ @Test
+ public void endlessSearchLoop() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List<SegmentId> refs = new ArrayList<SegmentId>();
+ for (int i = 0; i < 1024; i++) {
+ refs.add(tbl.getSegmentId(i, i % 64));
+ }
+
+ Callable<SegmentId> c = new Callable<SegmentId>() {
+
+ @Override
+ public SegmentId call() throws Exception {
+ // (2,1) doesn't exist
+ return tbl.getSegmentId(2, 1);
+ }
+ };
+ Future<SegmentId> f = Executors.newSingleThreadExecutor().submit(c);
+ SegmentId s = null;
+ try {
+ s = f.get(5, TimeUnit.SECONDS);
+ } catch (Exception e) {
+ Assert.fail(e.getMessage());
+ }
+ Assert.assertNotNull(s);
+ Assert.assertEquals(2, s.getMostSignificantBits());
+ Assert.assertEquals(1, s.getLeastSignificantBits());
+ }
+
+ @Test
+ public void randomized() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List<SegmentId> refs = new ArrayList<SegmentId>();
+ Random r = new Random(1);
+ for (int i = 0; i < 16 * 1024; i++) {
+ refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong()));
+ }
+ Assert.assertEquals(16 * 1024, tbl.getEntryCount());
+ Assert.assertEquals(16 * 2048, tbl.getMapSize());
+ Assert.assertEquals(5, tbl.getMapRebuildCount());
+
+ r = new Random(1);
+ for (int i = 0; i < 16 * 1024; i++) {
+ refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong()));
+ Assert.assertEquals(16 * 1024, tbl.getEntryCount());
+ Assert.assertEquals(16 * 2048, tbl.getMapSize());
+ Assert.assertEquals(5, tbl.getMapRebuildCount());
+ }
+ }
+
+ @Test
+ public void clearTable() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List<SegmentId> refs = new ArrayList<SegmentId>();
+ int originalCount = 8;
+ for (int i = 0; i < originalCount; i++) {
+ refs.add(tbl.getSegmentId(i, i % 2));
+ }
+ Assert.assertEquals(originalCount, tbl.getEntryCount());
+ Assert.assertEquals(0, tbl.getMapRebuildCount());
+
+ tbl.clearSegmentIdTables(new CompactionStrategy(false, false,
+ CleanupType.CLEAN_NONE, originalCount, (byte) 0) {
+
+ @Override
+ public boolean compacted(@Nonnull Callable<Boolean> setHead)
+ throws Exception {
+ return true;
+ }
+
+ @Override
+ public boolean canRemove(SegmentId id) {
+ return id.getMostSignificantBits() < 4;
+ }
+
+ });
+
+ Assert.assertEquals(4, tbl.getEntryCount());
+
+ for (SegmentId id : refs) {
+ if (id.getMostSignificantBits() >= 4) {
+ SegmentId id2 = tbl.getSegmentId(
+ id.getMostSignificantBits(),
+ id.getLeastSignificantBits());
+ List<SegmentId> list = tbl.getRawSegmentIdList();
+ if (list.size() != new HashSet<SegmentId>(list).size()) {
+ Collections.sort(list);
+ fail("duplicate entry " + list.toString());
+ }
+ Assert.assertTrue(id == id2);
+ }
+ }
+ }
+
+ @Test
+ public void justHashCollisions() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List<SegmentId> refs = new ArrayList<SegmentId>();
+ int originalCount = 1024;
+ for (int i = 0; i < originalCount; i++) {
+ // modulo 128 to ensure we have conflicts
+ refs.add(tbl.getSegmentId(i, i % 128));
+ }
+ Assert.assertEquals(originalCount, tbl.getEntryCount());
+ Assert.assertEquals(1, tbl.getMapRebuildCount());
+
+ List<SegmentId> refs2 = new ArrayList<SegmentId>();
+ tbl.collectReferencedIds(refs2);
+ Assert.assertEquals(refs.size(), refs2.size());
+
+ Assert.assertEquals(originalCount, tbl.getEntryCount());
+ // we don't expect that there was a refresh,
+ // because there were just hash collisions
+ Assert.assertEquals(1, tbl.getMapRebuildCount());
+ }
+
+ @Test
+ public void gc() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List<SegmentId> refs = new ArrayList<SegmentId>();
+ int originalCount = 1024;
+ for (int i = 0; i < originalCount; i++) {
+ // modulo 128 to ensure we have conflicts
+ refs.add(tbl.getSegmentId(i, i % 128));
+ }
+ Assert.assertEquals(originalCount, tbl.getEntryCount());
+ Assert.assertEquals(1, tbl.getMapRebuildCount());
+
+ for (int i = 0; i < refs.size() / 2; i++) {
+ // we need to remove the first entries,
+ // because if we remove the last entries, then
+ // getSegmentId would not detect that entries were freed up
+ refs.remove(0);
+ }
+ for (int gcCalls = 0;; gcCalls++) {
+ // needed here, so some entries can be garbage collected
+ System.gc();
+
+ for (SegmentId id : refs) {
+ SegmentId id2 = tbl.getSegmentId(id.getMostSignificantBits(), id.getLeastSignificantBits());
+ Assert.assertTrue(id2 == id);
+ }
+ // because we found each entry, we expect the refresh count is the same
+ Assert.assertEquals(1, tbl.getMapRebuildCount());
+
+ // even thought this does not increase the entry count a lot,
+ // it is supposed to detect that entries were removed,
+ // and force a refresh, which would get rid of the unreferenced ids
+ for (int i = 0; i < 10; i++) {
+ tbl.getSegmentId(i, i);
+ }
+
+ if (tbl.getEntryCount() < originalCount) {
+ break;
+ } else if (gcCalls > 10) {
+ fail("No entries were garbage collected after 10 times System.gc()");
+ }
+ }
+ Assert.assertEquals(2, tbl.getMapRebuildCount());
+ }
+}
\ No newline at end of file