You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2019/03/28 12:49:34 UTC

svn commit: r1856472 - in /jackrabbit/oak/trunk: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ oak-search/src/main/java/org/apache/jackrabbit/oak/plugin...

Author: thomasm
Date: Thu Mar 28 12:49:34 2019
New Revision: 1856472

URL: http://svn.apache.org/viewvc?rev=1856472&view=rev
Log:
OAK-8116 Expose text extraction metrics as sling metrics

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java?rev=1856472&r1=1856471&r2=1856472&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java Thu Mar 28 12:49:34 2019
@@ -374,7 +374,7 @@ public class LuceneIndexProviderService
         whiteboard = new OsgiWhiteboard(bundleContext);
         threadPoolSize = PropertiesUtil.toInteger(config.get(PROP_THREAD_POOL_SIZE), PROP_THREAD_POOL_SIZE_DEFAULT);
         initializeIndexDir(bundleContext, config);
-        initializeExtractedTextCache(bundleContext, config);
+        initializeExtractedTextCache(bundleContext, config, statisticsProvider);
         tracker = createTracker(bundleContext, config);
         indexProvider = new LuceneIndexProvider(tracker, scorerFactory, augmentorFactory);
         initializeActiveBlobCollector(whiteboard, config);
@@ -680,7 +680,7 @@ public class LuceneIndexProviderService
         log.debug("Lucene46Codec is loaded: {}", ensureLucene46CodecLoaded);
     }
 
-    private void initializeExtractedTextCache(BundleContext bundleContext, Map<String, ?> config) {
+    private void initializeExtractedTextCache(BundleContext bundleContext, Map<String, ?> config, StatisticsProvider statisticsProvider) {
         int cacheSizeInMB = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_SIZE),
                 PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT);
         int cacheExpiryInSecs = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_EXPIRY),
@@ -692,7 +692,7 @@ public class LuceneIndexProviderService
                 cacheSizeInMB * ONE_MB,
                 cacheExpiryInSecs,
                 alwaysUsePreExtractedCache,
-                indexDir);
+                indexDir, statisticsProvider);
         if (extractedTextProvider != null){
             registerExtractedTextProvider(extractedTextProvider);
         }

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java?rev=1856472&r1=1856471&r2=1856472&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java Thu Mar 28 12:49:34 2019
@@ -47,6 +47,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
 import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditor;
+import org.apache.jackrabbit.oak.stats.StatisticsProvider;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
@@ -79,6 +80,7 @@ public class ExtractedTextCache {
     private long totalTextSize;
     private long totalTime;
     private int preFetchedCount;
+    private final StatisticsProvider statisticsProvider;
 
     // the actual cache. key: content id, value: extracted text
     private final Cache<String, String> cache;
@@ -97,6 +99,11 @@ public class ExtractedTextCache {
 
     public ExtractedTextCache(long maxWeight, long expiryTimeInSecs, boolean alwaysUsePreExtractedCache,
                               File indexDir) {
+        this(maxWeight, expiryTimeInSecs, alwaysUsePreExtractedCache, indexDir, null);
+    }
+
+    public ExtractedTextCache(long maxWeight, long expiryTimeInSecs, boolean alwaysUsePreExtractedCache,
+                              File indexDir, StatisticsProvider statisticsProvider) {
         if (maxWeight > 0) {
             cache = CacheBuilder.newBuilder()
                     .weigher(EmpiricalWeigher.INSTANCE)
@@ -114,6 +121,7 @@ public class ExtractedTextCache {
         this.timeoutMap = new ConcurrentHashMap<>();
         this.indexDir = indexDir;
         loadTimeoutMap();
+        this.statisticsProvider = statisticsProvider;
     }
 
     /**
@@ -191,6 +199,10 @@ public class ExtractedTextCache {
         this.totalTextSize += textLength;
     }
 
+    public StatisticsProvider getStatisticsProvider() {
+        return statisticsProvider;
+    }
+
     public TextExtractionStatsMBean getStatsMBean() {
         return new TextExtractionStatsMBean() {
             @Override

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java?rev=1856472&r1=1856471&r2=1856472&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java Thu Mar 28 12:49:34 2019
@@ -40,6 +40,9 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditorContext;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.stats.StatisticsProvider;
+import org.apache.jackrabbit.oak.stats.StatsOptions;
+import org.apache.jackrabbit.oak.stats.TimerStats;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -60,6 +63,7 @@ import static org.apache.jackrabbit.oak.
  *
  */
 public class FulltextBinaryTextExtractor {
+  private final static String TEXT_EXTRACTION_TIMER_METRIC_NAME = "TEXT_EXTRACTION_TIME";
 
   private static final Logger log = LoggerFactory.getLogger(FulltextBinaryTextExtractor.class);
   private static final Parser defaultParser = createDefaultParser();
@@ -122,13 +126,22 @@ public class FulltextBinaryTextExtractor
     return values;
   }
 
-  private String parseStringValue(Blob v, Metadata metadata, String path, String propertyName) {
-    String text = extractedTextCache.get(path, propertyName, v, reindex);
-    if (text == null){
-      text = parseStringValue0(v, metadata, path);
+    private String parseStringValue(Blob v, Metadata metadata, String path, String propertyName) {
+        String text = extractedTextCache.get(path, propertyName, v, reindex);
+        if (text == null) {
+            StatisticsProvider stats = extractedTextCache.getStatisticsProvider();
+            if (stats != null) {
+                TimerStats textExtractionTimerMetricStats = stats
+                        .getTimer(TEXT_EXTRACTION_TIMER_METRIC_NAME, StatsOptions.METRICS_ONLY);
+                TimerStats.Context context = textExtractionTimerMetricStats.time();
+                text = parseStringValue0(v, metadata, path);
+                context.stop();
+            } else {
+                text = parseStringValue0(v, metadata, path);
+            }
+        }
+        return text;
     }
-    return text;
-  }
 
   private String parseStringValue0(Blob v, Metadata metadata, String path) {
     WriteOutContentHandler handler = new WriteOutContentHandler(definition.getMaxExtractLength());