You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by md...@apache.org on 2015/07/15 15:58:49 UTC

svn commit: r1691217 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/segment/ test/java/org/apache/jackrabbit/oak/plugins/segment/

Author: mduerig
Date: Wed Jul 15 13:58:49 2015
New Revision: 1691217

URL: http://svn.apache.org/r1691217
Log:
OAK-3007: SegmentStore cache does not take "string" map into account
Use LIRS cache for string records instead of concurrent hash map

Added:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/StringCacheTest.java
Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java?rev=1691217&r1=1691216&r2=1691217&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java Wed Jul 15 13:58:49 2015
@@ -36,7 +36,10 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.ConcurrentMap;
 
+import javax.annotation.Nullable;
+
 import com.google.common.base.Charsets;
+import com.google.common.base.Function;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
@@ -123,8 +126,25 @@ public class Segment {
     /**
      * String records read from segment. Used to avoid duplicate
      * copies and repeated parsing of the same strings.
+     *
+     * @deprecated  Superseded by {@link #stringCache} unless
+     * {@link SegmentTracker#DISABLE_STRING_CACHE} is {@code true}.
+     */
+    @Deprecated
+    private final ConcurrentMap<Integer, String> strings;
+
+    private final Function<Integer, String> loadString = new Function<Integer, String>() {
+        @Nullable
+        @Override
+        public String apply(Integer offset) {
+            return loadString(offset);
+        }
+    };
+
+    /**
+     * Cache for string records or {@code null} if {@link #strings} is used for caching
      */
-    private final ConcurrentMap<Integer, String> strings = newConcurrentMap();
+    private final StringCache stringCache;
 
     /**
      * Template records read from segment. Used to avoid duplicate
@@ -132,7 +152,7 @@ public class Segment {
      */
     private final ConcurrentMap<Integer, Template> templates = newConcurrentMap();
 
-    private volatile long accessed = 0;
+    private volatile long accessed;
 
     /**
      * Decode a 4 byte aligned segment offset.
@@ -159,6 +179,13 @@ public class Segment {
     public Segment(SegmentTracker tracker, SegmentId id, ByteBuffer data, SegmentVersion version) {
         this.tracker = checkNotNull(tracker);
         this.id = checkNotNull(id);
+        if (tracker.getStringCache() == null) {
+            strings = newConcurrentMap();
+            stringCache = null;
+        } else {
+            strings = null;
+            stringCache = tracker.getStringCache();
+        }
         this.data = checkNotNull(data);
         if (id.isDataSegmentId()) {
             byte segmentVersion = data.get(3);
@@ -178,6 +205,13 @@ public class Segment {
     Segment(SegmentTracker tracker, byte[] buffer) {
         this.tracker = checkNotNull(tracker);
         this.id = tracker.newDataSegmentId();
+        if (tracker.getStringCache() == null) {
+            strings = newConcurrentMap();
+            stringCache = null;
+        } else {
+            strings = null;
+            stringCache = tracker.getStringCache();
+        }
         this.data = ByteBuffer.wrap(checkNotNull(buffer));
         this.refids = new SegmentId[SEGMENT_REFERENCE_LIMIT + 1];
         this.refids[0] = id;
@@ -367,12 +401,18 @@ public class Segment {
     }
 
     private String readString(int offset) {
-        String string = strings.get(offset);
-        if (string == null) {
-            string = loadString(offset);
-            strings.putIfAbsent(offset, string); // only keep the first copy
+        if (stringCache != null) {
+            long msb = id.getMostSignificantBits();
+            long lsb = id.getLeastSignificantBits();
+            return stringCache.getString(msb, lsb, offset, loadString);
+        } else {
+            String string = strings.get(offset);
+            if (string == null) {
+                string = loadString(offset);
+                strings.putIfAbsent(offset, string); // only keep the first copy
+            }
+            return string;
         }
-        return string;
     }
 
     private String loadString(int offset) {

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java?rev=1691217&r1=1691216&r2=1691217&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java Wed Jul 15 13:58:49 2015
@@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugin
 import static com.google.common.collect.Lists.newLinkedList;
 import static com.google.common.collect.Queues.newArrayDeque;
 import static com.google.common.collect.Sets.newHashSet;
+import static java.lang.Boolean.getBoolean;
 
 import java.security.SecureRandom;
 import java.util.LinkedList;
@@ -45,6 +46,12 @@ public class SegmentTracker {
     private static final Logger log =
             LoggerFactory.getLogger(SegmentTracker.class);
 
+    /**
+     * Disable the {@link #stringCache} if {@code true} and fall back to
+     * the previous {@link Segment#strings} caching mechanism.
+     */
+    private static final boolean DISABLE_STRING_CACHE = getBoolean("oak.segment.disableStringCache");
+
     private static final long MSB_MASK = ~(0xfL << 12);
 
     private static final long VERSION = (0x4L << 12);
@@ -92,6 +99,11 @@ public class SegmentTracker {
 
     private long currentSize;
 
+    /**
+     * Cache for string records
+     */
+    private final StringCache stringCache;
+
     public SegmentTracker(SegmentStore store, int cacheSizeMB,
             SegmentVersion version) {
         for (int i = 0; i < tables.length; i++) {
@@ -103,6 +115,14 @@ public class SegmentTracker {
         this.cacheSize = cacheSizeMB * MB;
         this.compactionMap = new AtomicReference<CompactionMap>(
                 CompactionMap.EMPTY);
+        StringCache c;
+        if (DISABLE_STRING_CACHE) {
+            c = null;
+        } else {
+            int stringCacheSize = (int) Math.min(Integer.MAX_VALUE, cacheSize);
+            c = new StringCache(stringCacheSize);
+        }
+        stringCache = c;
     }
 
     public SegmentTracker(SegmentStore store, SegmentVersion version) {
@@ -122,12 +142,24 @@ public class SegmentTracker {
     }
 
     /**
-     * Clear the segment cache
+     * Clear the caches
      */
     public synchronized void clearCache() {
         segments.clear();
+        if (stringCache != null) {
+            stringCache.clear();
+        }
         currentSize = 0;
     }
+    
+    /**
+     * Get the string cache, if there is one.
+     * 
+     * @return the string cache or {@code null} if none is configured
+     */
+    StringCache getStringCache() {
+        return stringCache;
+    }
 
     Segment getSegment(SegmentId id) {
         try {

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java?rev=1691217&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java Wed Jul 15 13:58:49 2015
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.segment;
+
+import java.util.Arrays;
+
+import com.google.common.base.Function;
+import org.apache.jackrabbit.oak.cache.CacheLIRS;
+
+/**
+ * A string cache. It has two components: a fast cache for small strings, based
+ * on an array, and a slow cache that uses a LIRS cache.
+ */
+public class StringCache {
+
+    /**
+     * The fast (array based) cache.
+     */
+    private final FastCache fastCache = new FastCache();
+
+    /**
+     * The slower (LIRS) cache.
+     */
+    private final CacheLIRS<StringCacheEntry, String> cache;
+
+    /**
+     * Create a new string cache.
+     *
+     * @param maxSize the maximum memory in bytes.
+     */
+    StringCache(int maxSize) {
+        cache = CacheLIRS.<StringCacheEntry, String>newBuilder()
+            .maximumSize(maxSize)
+            .averageWeight(100)
+            .build();
+    }
+
+    /**
+     * Get the string, loading it if necessary.
+     *
+     * @param msb the msb of the segment
+     * @param lsb the lsb of the segment
+     * @param offset the offset
+     * @param loader the string loader function
+     * @return the string (never null)
+     */
+    public String getString(long msb, long lsb, int offset, Function<Integer, String> loader) {
+        int hash = getEntryHash(msb, lsb, offset);
+        String s = fastCache.getString(hash, msb, lsb, offset);
+        if (s != null) {
+            return s;
+        }
+        StringCacheEntry key = new StringCacheEntry(hash, msb, lsb, offset, null);
+        s = cache.getIfPresent(key);
+        if (s == null) {
+            s = loader.apply(offset);
+            cache.put(key, s, getMemory(s));
+        }
+        if (FastCache.isSmall(s)) {
+            key.setString(s);
+            fastCache.addString(hash, key);
+        }
+        return s;
+    }
+
+    /**
+     * Clear the cache.
+     */
+    public void clear() {
+        cache.invalidateAll();
+        fastCache.clear();
+    }
+
+    private static int getMemory(String s) {
+        return 100 + s.length() * 2;
+    }
+
+    private static int getEntryHash(long lsb, long msb, int offset) {
+        int hash = (int) (msb ^ lsb) + offset;
+        hash = ((hash >>> 16) ^ hash) * 0x45d9f3b;
+        return (hash >>> 16) ^ hash;
+    }
+
+    /**
+     * A fast cache based on an array.
+     */
+    static class FastCache {
+
+        /**
+         * The maximum number of characters in string that are cached.
+         */
+        static final int MAX_STRING_SIZE = 128;
+
+        /**
+         * The number of entries in the cache. Must be a power of 2.
+         */
+        private static final int CACHE_SIZE = 16 * 1024;
+
+        /**
+         * The cache array.
+         */
+        private final StringCacheEntry[] cache = new StringCacheEntry[CACHE_SIZE];
+
+        /**
+         * Get the string if it is stored.
+         *
+         * @param hash the hash
+         * @param msb
+         * @param lsb
+         * @param offset the offset
+         * @return the string, or null
+         */
+        String getString(int hash, long msb, long lsb, int offset) {
+            int index = hash & (CACHE_SIZE - 1);
+            StringCacheEntry e = cache[index];
+            if (e != null && e.matches(msb, lsb, offset)) {
+                return e.string;
+            }
+            return null;
+        }
+
+        void clear() {
+            Arrays.fill(cache, null);
+        }
+
+        /**
+         * Whether the entry is small, in which case it can be kept in the fast cache.
+         * 
+         * @param s the string
+         * @return whether the entry is small
+         */
+        static boolean isSmall(String s) {
+            return s.length() <= MAX_STRING_SIZE;
+        }
+
+        void addString(int hash, StringCacheEntry entry) {
+            int index = hash & (CACHE_SIZE - 1);
+            cache[index] = entry;
+        }
+
+    }
+
+    static class StringCacheEntry {
+        private final int hash;
+        private final long msb, lsb;
+        private final int offset;
+        private String string;
+
+        StringCacheEntry(int hash, long msb, long lsb, int offset, String string) {
+            this.hash = hash;
+            this.msb = msb;
+            this.lsb = lsb;
+            this.offset = offset;
+            this.string = string;
+        }
+
+        void setString(String string) {
+            if (string == null) {
+                throw new NullPointerException();
+            }
+            this.string = string;
+        }
+
+        boolean matches(long msb, long lsb, int offset) {
+            return this.offset == offset && this.msb == msb && this.lsb == lsb;
+        }
+
+        @Override
+        public int hashCode() {
+            return hash;
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            if (other == this) {
+                return true;
+            }
+            if (!(other instanceof StringCacheEntry)) {
+                return false;
+            }
+            StringCacheEntry o = (StringCacheEntry) other;
+            return o.hash == hash && o.msb == msb && o.lsb == lsb &&
+                    o.offset == offset;
+        }
+
+    }
+
+}
\ No newline at end of file

Added: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/StringCacheTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/StringCacheTest.java?rev=1691217&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/StringCacheTest.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/StringCacheTest.java Wed Jul 15 13:58:49 2015
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.segment;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import javax.annotation.Nullable;
+
+import org.junit.Test;
+
+import com.google.common.base.Function;
+
+public class StringCacheTest {
+
+    @Test
+    public void empty() {
+        final AtomicInteger counter = new AtomicInteger();
+        Function<Integer, String> loader = new Function<Integer, String>() {
+            @Override @Nullable
+            public String apply(@Nullable Integer input) {
+                counter.incrementAndGet();
+                return "" + input;
+            }
+        };
+        StringCache c = new StringCache(0);
+        for (int repeat = 0; repeat < 10; repeat++) {
+            for (int i = 0; i < 1000; i++) {
+                assertEquals("" + i, c.getString(i, i, i, loader));
+            }
+        }
+        // the LIRS cache should be almost empty (low hit rate there)
+        assertTrue("" + counter, counter.get() > 1000);
+        // but the fast cache should improve the total hit rate
+        assertTrue("" + counter, counter.get() < 5000);
+    }
+    
+    @Test
+    public void largeEntries() {
+        final AtomicInteger counter = new AtomicInteger();
+        final String large = new String(new char[1024]);
+        Function<Integer, String> loader = new Function<Integer, String>() {
+            @Override @Nullable
+            public String apply(@Nullable Integer input) {
+                counter.incrementAndGet();
+                return large + input;
+            }
+        };
+        StringCache c = new StringCache(1024);
+        for (int repeat = 0; repeat < 10; repeat++) {
+            for (int i = 0; i < 1000; i++) {
+                assertEquals(large + i, c.getString(i, i, i, loader));
+                assertEquals(large + 0, c.getString(0, 0, 0, loader));
+            }
+        }
+        // the LIRS cache should be almost empty (low hit rate there)
+        // and large strings are not kept in the fast cache, so hit rate should be bad
+        assertTrue("" + counter, counter.get() > 9000);
+        assertTrue("" + counter, counter.get() < 10000);
+    }
+    
+    @Test
+    public void clear() {
+        final AtomicInteger counter = new AtomicInteger();
+        Function<Integer, String> uniqueLoader = new Function<Integer, String>() {
+            @Override @Nullable
+            public String apply(@Nullable Integer input) {
+                return "" + counter.incrementAndGet();
+            }
+        };
+        StringCache c = new StringCache(0);
+        // load a new entry
+        assertEquals("1", c.getString(0, 0, 0, uniqueLoader));
+        // but only once
+        assertEquals("1", c.getString(0, 0, 0, uniqueLoader));
+        c.clear();
+        // after clearing the cache, load a new entry
+        assertEquals("2", c.getString(0, 0, 0, uniqueLoader));
+        assertEquals("2", c.getString(0, 0, 0, uniqueLoader));
+    }
+    
+    @Test
+    public void randomized() {
+        ArrayList<Function<Integer, String>> loaderList = new ArrayList<Function<Integer, String>>();
+        int segmentCount = 10;
+        for (int i = 0; i < segmentCount; i++) {
+            final int x = i;
+            Function<Integer, String> loader = new Function<Integer, String>() {
+                @Override @Nullable
+                public String apply(@Nullable Integer input) {
+                    return "loader #" + x + " offset " + input;
+                }
+            };
+            loaderList.add(loader);
+        }
+        StringCache c = new StringCache(10);
+        Random r = new Random(1);
+        for (int i = 0; i < 1000; i++) {
+            int segment = r.nextInt(segmentCount);
+            int offset = r.nextInt(10);
+            Function<Integer, String> loader = loaderList.get(segment);
+            String x = c.getString(segment, segment, offset, loader);
+            assertEquals(loader.apply(offset), x);
+        }
+    }
+    
+}