You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2015/04/15 16:41:29 UTC

svn commit: r1673789 - in /jackrabbit/oak/branches/1.0/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java

Author: thomasm
Date: Wed Apr 15 14:41:29 2015
New Revision: 1673789

URL: http://svn.apache.org/r1673789
Log:
OAK-2752 SegmentIdTable can sometimes hang when calling getSegmentId(msb, lsb)

Added:
    jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java
Modified:
    jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java

Modified: jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java?rev=1673789&r1=1673788&r2=1673789&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (original)
+++ jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java Wed Apr 15 14:41:29 2015
@@ -23,9 +23,12 @@ import static java.util.Collections.nCop
 import java.lang.ref.WeakReference;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Hash table of weak references to segment identifiers.
@@ -33,28 +36,53 @@ import org.apache.jackrabbit.oak.plugins
 public class SegmentIdTable {
 
     /**
-     * Hash table of weak references to segment identifiers that are
-     * currently being accessed. The size of the table is always a power
-     * of two, which optimizes the {@link #expand()} operation. The table is
-     * indexed by the random identifier bits, which guarantees uniform
-     * distribution of entries. Each table entry is either {@code null}
-     * (when there are no matching identifiers) or a list of weak references
-     * to the matching identifiers.
+     * The list of weak references to segment identifiers that are currently
+     * being accessed. This represents a hash table that uses open addressing
+     * with linear probing. It is not a hash map, to speed up read access.
+     * <p>
+     * The size of the table is always a power of two, so that we can use
+     * bitwise "and" instead of modulo.
+     * <p>
+     * The table is indexed by the random identifier bits, which guarantees
+     * uniform distribution of entries.
+     * <p>
+     * Open addressing with linear probing is used. Each table entry is either
+     * null (when there are no matching identifiers), a weak references to the
+     * matching identifier, or a weak reference to another identifier.
+     * There are no tombstone entries as there is no explicit remove operation,
+     * but a referent can become null if the entry is garbage collected.
+     * <p>
+     * The array is not sorted (we could; lookup might be faster, but adding
+     * entries would be slower).
      */
     private final ArrayList<WeakReference<SegmentId>> references =
             newArrayList(nCopies(1024, (WeakReference<SegmentId>) null));
 
     private final SegmentTracker tracker;
+    
+    private static final Logger LOG = LoggerFactory.getLogger(SegmentIdTable.class);
+
+    
+    /**
+     * The refresh count (for diagnostics and testing).
+     */
+    private int rebuildCount;
+    
+    /**
+     * The number of used entries (WeakReferences) in this table.
+     */
+    private int entryCount;
 
     SegmentIdTable(SegmentTracker tracker) {
         this.tracker = tracker;
     }
 
     /**
+     * Get the segment id, and reference it in the weak references map.
      * 
      * @param msb
      * @param lsb
-     * @return
+     * @return the segment id
      */
     synchronized SegmentId getSegmentId(long msb, long lsb) {
         int first = getIndex(lsb);
@@ -69,13 +97,20 @@ public class SegmentIdTable {
                     && id.getLeastSignificantBits() == lsb) {
                 return id;
             }
+            // shouldRefresh if we have a garbage collected entry
             shouldRefresh = shouldRefresh || id == null;
+            // open addressing / linear probing
             index = (index + 1) % references.size();
             reference = references.get(index);
         }
 
         SegmentId id = new SegmentId(tracker, msb, lsb);
         references.set(index, new WeakReference<SegmentId>(id));
+        entryCount++;
+        if (entryCount > references.size() * 0.75) {
+            // more than 75% full            
+            shouldRefresh = true;
+        }
         if (shouldRefresh) {
             refresh();
         }
@@ -85,7 +120,7 @@ public class SegmentIdTable {
     /**
      * Returns all segment identifiers that are currently referenced in memory.
      *
-     * @return referenced segment identifiers
+     * @param ids referenced segment identifiers
      */
     void collectReferencedIds(Collection<SegmentId> ids) {
         ids.addAll(refresh());
@@ -107,16 +142,33 @@ public class SegmentIdTable {
                     hashCollisions = hashCollisions || (i != getIndex(id));
                 } else {
                     references.set(i, null);
+                    entryCount--;
                     emptyReferences = true;
                 }
             }
         }
+        
+        if (entryCount != ids.size()) {
+            // something is wrong, possibly a concurrency problem, a SegmentId
+            // hashcode or equals bug, or a problem with this hash table
+            // algorithm
+            LOG.warn("Unexpected entry count mismatch, expected " + 
+                    entryCount + " got " + ids.size());
+            // we fix the count, because having a wrong entry count would be
+            // very problematic; even worse than having a concurrency problem
+            entryCount = ids.size();
+        }
 
         while (2 * ids.size() > size) {
             size *= 2;
         }
 
+        // we need to re-build the table if the new size is different,
+        // but also if we removed some of the entries (because an entry was
+        // garbage collected) and there is at least one entry at the "wrong"
+        // location (due to open addressing)
         if ((hashCollisions && emptyReferences) || size != references.size()) {
+            rebuildCount++;
             references.clear();
             references.addAll(nCopies(size, (WeakReference<SegmentId>) null));
 
@@ -150,8 +202,11 @@ public class SegmentIdTable {
                 SegmentId id = reference.get();
                 if (id != null) {
                     if (strategy.canRemove(id)) {
+                        // we clear the reference here, but we must not
+                        // remove the reference from the list, because
+                        // that could cause duplicate references
+                        // (there is a unit test for this case)
                         reference.clear();
-                        references.set(i, null);
                         dirty = true;
                     }
                 }
@@ -161,4 +216,50 @@ public class SegmentIdTable {
             refresh();
         }
     }
+    
+    /**
+     * Get the number of map rebuild operations (used for testing and diagnostics).
+     * 
+     * @return the rebuild count
+     */
+    int getMapRebuildCount() {
+        return rebuildCount;
+    }
+    
+    /**
+     * Get the entry count (used for testing and diagnostics).
+     * 
+     * @return the entry count
+     */
+    int getEntryCount() {
+        return entryCount;
+    }
+    
+    /**
+     * Get the size of the internal map (used for testing and diagnostics).
+     * 
+     * @return the map size
+     */
+    int getMapSize() {
+        return references.size();
+    }
+    
+    /**
+     * Get the raw list of segment ids (used for testing).
+     * 
+     * @return the raw list
+     */
+    List<SegmentId> getRawSegmentIdList() {
+        ArrayList<SegmentId> list = new ArrayList<SegmentId>();
+        for (WeakReference<SegmentId> ref : references) {
+            if (ref != null) {
+                SegmentId id = ref.get();
+                if (id != null) {
+                    list.add(id);
+                }
+            }
+        }
+        return list;
+    }
+
 }

Added: jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java?rev=1673789&view=auto
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java (added)
+++ jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java Wed Apr 15 14:41:29 2015
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.segment;
+
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import javax.annotation.Nonnull;
+
+import junit.framework.Assert;
+
+import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy;
+import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy.CleanupType;
+import org.apache.jackrabbit.oak.plugins.segment.memory.MemoryStore;
+import org.junit.Test;
+
+public class SegmentIdTableTest {
+
+    /**
+     * OAK-2752
+     */
+    @Test
+    public void endlessSearchLoop() {
+        SegmentTracker tracker = new MemoryStore().getTracker();
+        final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+        List<SegmentId> refs = new ArrayList<SegmentId>();
+        for (int i = 0; i < 1024; i++) {
+            refs.add(tbl.getSegmentId(i, i % 64));
+        }
+
+        Callable<SegmentId> c = new Callable<SegmentId>() {
+
+            @Override
+            public SegmentId call() throws Exception {
+                // (2,1) doesn't exist
+                return tbl.getSegmentId(2, 1);
+            }
+        };
+        Future<SegmentId> f = Executors.newSingleThreadExecutor().submit(c);
+        SegmentId s = null;
+        try {
+            s = f.get(5, TimeUnit.SECONDS);
+        } catch (Exception e) {
+            Assert.fail(e.getMessage());
+        }
+        Assert.assertNotNull(s);
+        Assert.assertEquals(2, s.getMostSignificantBits());
+        Assert.assertEquals(1, s.getLeastSignificantBits());
+    }
+    
+    @Test
+    public void randomized() {
+        SegmentTracker tracker = new MemoryStore().getTracker();
+        final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+        List<SegmentId> refs = new ArrayList<SegmentId>();
+        Random r = new Random(1);
+        for (int i = 0; i < 16 * 1024; i++) {
+            refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong()));
+        }
+        Assert.assertEquals(16 * 1024, tbl.getEntryCount());
+        Assert.assertEquals(16 * 2048, tbl.getMapSize());
+        Assert.assertEquals(5, tbl.getMapRebuildCount());
+        
+        r = new Random(1);
+        for (int i = 0; i < 16 * 1024; i++) {
+            refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong()));
+            Assert.assertEquals(16 * 1024, tbl.getEntryCount());
+            Assert.assertEquals(16 * 2048, tbl.getMapSize());
+            Assert.assertEquals(5, tbl.getMapRebuildCount());
+        }
+    }
+    
+    @Test
+    public void clearTable() {
+        SegmentTracker tracker = new MemoryStore().getTracker();
+        final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+        List<SegmentId> refs = new ArrayList<SegmentId>();
+        int originalCount = 8;
+        for (int i = 0; i < originalCount; i++) {
+            refs.add(tbl.getSegmentId(i, i % 2));
+        }
+        Assert.assertEquals(originalCount, tbl.getEntryCount());
+        Assert.assertEquals(0, tbl.getMapRebuildCount());
+        
+        tbl.clearSegmentIdTables(new CompactionStrategy(false, false, 
+                CleanupType.CLEAN_NONE, originalCount, (byte) 0) {
+
+            @Override
+            public boolean compacted(@Nonnull Callable<Boolean> setHead)
+                    throws Exception {
+                return true;
+            }
+
+            @Override
+            public boolean canRemove(SegmentId id) {
+                return id.getMostSignificantBits() < 4;
+            }
+            
+        });
+        
+        Assert.assertEquals(4, tbl.getEntryCount());
+
+        for (SegmentId id : refs) {
+            if (id.getMostSignificantBits() >= 4) {
+                SegmentId id2 = tbl.getSegmentId(
+                        id.getMostSignificantBits(),
+                        id.getLeastSignificantBits());
+                List<SegmentId> list = tbl.getRawSegmentIdList();
+                if (list.size() != new HashSet<SegmentId>(list).size()) {
+                    Collections.sort(list);
+                    fail("duplicate entry " + list.toString());
+                }
+                Assert.assertTrue(id == id2);
+            }
+        }
+    }
+    
+    @Test
+    public void justHashCollisions() {
+        SegmentTracker tracker = new MemoryStore().getTracker();
+        final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+        List<SegmentId> refs = new ArrayList<SegmentId>();
+        int originalCount = 1024;
+        for (int i = 0; i < originalCount; i++) {
+            // modulo 128 to ensure we have conflicts
+            refs.add(tbl.getSegmentId(i, i % 128));
+        }
+        Assert.assertEquals(originalCount, tbl.getEntryCount());
+        Assert.assertEquals(1, tbl.getMapRebuildCount());
+        
+        List<SegmentId> refs2 = new ArrayList<SegmentId>();
+        tbl.collectReferencedIds(refs2);
+        Assert.assertEquals(refs.size(), refs2.size());
+
+        Assert.assertEquals(originalCount, tbl.getEntryCount());
+        // we don't expect that there was a refresh, 
+        // because there were just hash collisions
+        Assert.assertEquals(1, tbl.getMapRebuildCount());
+    }
+    
+    @Test
+    public void gc() {
+        SegmentTracker tracker = new MemoryStore().getTracker();
+        final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+        List<SegmentId> refs = new ArrayList<SegmentId>();
+        int originalCount = 1024;
+        for (int i = 0; i < originalCount; i++) {
+            // modulo 128 to ensure we have conflicts
+            refs.add(tbl.getSegmentId(i, i % 128));
+        }
+        Assert.assertEquals(originalCount, tbl.getEntryCount());
+        Assert.assertEquals(1, tbl.getMapRebuildCount());
+
+        for (int i = 0; i < refs.size() / 2; i++) {
+            // we need to remove the first entries,
+            // because if we remove the last entries, then
+            // getSegmentId would not detect that entries were freed up
+            refs.remove(0);
+        }
+        for (int gcCalls = 0;; gcCalls++) {
+            // needed here, so some entries can be garbage collected
+            System.gc();
+            
+            for (SegmentId id : refs) {
+                SegmentId id2 = tbl.getSegmentId(id.getMostSignificantBits(), id.getLeastSignificantBits());
+                Assert.assertTrue(id2 == id);
+            }
+            // because we found each entry, we expect the refresh count is the same
+            Assert.assertEquals(1, tbl.getMapRebuildCount());
+
+            // even thought this does not increase the entry count a lot,
+            // it is supposed to detect that entries were removed,
+            // and force a refresh, which would get rid of the unreferenced ids
+            for (int i = 0; i < 10; i++) {
+                tbl.getSegmentId(i, i);
+            }
+
+            if (tbl.getEntryCount() < originalCount) {
+                break;
+            } else if (gcCalls > 10) {
+                fail("No entries were garbage collected after 10 times System.gc()");
+            }
+        }
+        Assert.assertEquals(2, tbl.getMapRebuildCount());
+    }
+}
\ No newline at end of file