You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/07/17 17:47:05 UTC

[tika] 02/06: TIKA-3137 -- first pass, need to add unit tests for tika-batch

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit db4498d1de534f8348e94b0f27c641353a26b083
Author: tallison <ta...@apache.org>
AuthorDate: Thu Jul 16 15:58:00 2020 -0400

    TIKA-3137 -- first pass, need to add unit tests for tika-batch
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |   4 +-
 .../batch/fs/RecursiveParserWrapperFSConsumer.java |   9 +-
 .../tika/batch/fs/StreamOutRPWFSConsumer.java      |  20 ++-
 .../fs/builders/BasicTikaFSConsumersBuilder.java   |  11 +-
 .../RecursiveParserWrapperFSConsumerTest.java      |   5 +-
 .../java/org/apache/tika/config/TikaConfig.java    | 108 ++++++++++++-
 .../metadata/filter/ClearByMimeMetadataFilter.java |  74 +++++++++
 .../metadata/filter/CompositeMetadataFilter.java   |  38 +++++
 .../metadata/filter/DefaultMetadataFilter.java     |  46 ++++++
 .../filter/ExcludeFieldMetadataFilter.java         |  53 +++++++
 .../filter/IncludeFieldMetadataFilter.java         |  58 +++++++
 .../tika/metadata/filter/MetadataFilter.java       |  33 ++++
 .../apache/tika/metadata/filter/NoOpFilter.java    |  34 +++++
 .../tika/sax/RecursiveParserWrapperHandler.java    |  31 +++-
 .../org.apache.tika.metadata.filter.MetadataFilter |  16 ++
 .../org/apache/tika/config/TikaConfigTest.java     |   2 +
 .../tika/metadata/filter/MockUpperCaseFilter.java  |  39 +++++
 .../tika/metadata/filter/TestMetadataFilter.java   | 170 +++++++++++++++++++++
 .../org/apache/tika/config/TIKA-3137-exclude.xml   |  26 ++++
 .../apache/tika/config/TIKA-3137-include-uc.xml    |  27 ++++
 .../org/apache/tika/config/TIKA-3137-include.xml   |  26 ++++
 .../org/apache/tika/config/TIKA-3137-mimes-uc.xml  |  27 ++++
 .../tika/parser/RecursiveParserWrapperTest.java    |  43 ++++++
 .../org/apache/tika/parser/TIKA-3137-include.xml   |  31 ++++
 .../server/resource/RecursiveMetadataResource.java |   3 +-
 .../java/org/apache/tika/server/CXFTestBase.java   |   7 +-
 .../tika/server/RecursiveMetadataFilterTest.java   | 107 +++++++++++++
 .../org/apache/tika/server/TIKA-3137-include.xml   |  31 ++++
 28 files changed, 1062 insertions(+), 17 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 8077114..46f82ee 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -513,7 +513,9 @@ public class TikaCLI {
     private void handleRecursiveJson(URL url, OutputStream output) throws IOException, SAXException, TikaException {
         Metadata metadata = new Metadata();
         RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser);
-        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(getContentHandlerFactory(type), -1);
+        RecursiveParserWrapperHandler handler =
+                new RecursiveParserWrapperHandler(getContentHandlerFactory(type),
+                        -1, config.getMetadataFilter());
         try (InputStream input = TikaInputStream.get(url, metadata)) {
             wrapper.parse(input, handler, metadata, context);
         }
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
index 56b8b58..9732781 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
@@ -32,6 +32,8 @@ import org.apache.tika.batch.ParserFactory;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.NoOpFilter;
 import org.apache.tika.metadata.serialization.JsonMetadataList;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -50,6 +52,7 @@ public class RecursiveParserWrapperFSConsumer extends AbstractFSConsumer {
     private final Parser parser;
     private final ContentHandlerFactory contentHandlerFactory;
     private final OutputStreamFactory fsOSFactory;
+    private final MetadataFilter metadataFilter;
     private String outputEncoding = "UTF-8";
 
     /**
@@ -62,11 +65,12 @@ public class RecursiveParserWrapperFSConsumer extends AbstractFSConsumer {
     public RecursiveParserWrapperFSConsumer(ArrayBlockingQueue<FileResource> queue,
                                             Parser parser,
                                             ContentHandlerFactory contentHandlerFactory,
-                                            OutputStreamFactory fsOSFactory) {
+                                            OutputStreamFactory fsOSFactory, MetadataFilter metadataFilter) {
         super(queue);
         this.contentHandlerFactory = contentHandlerFactory;
         this.fsOSFactory = fsOSFactory;
         this.parser = parser;
+        this.metadataFilter = metadataFilter;
     }
 
     @Override
@@ -95,7 +99,8 @@ public class RecursiveParserWrapperFSConsumer extends AbstractFSConsumer {
         Throwable thrown = null;
         List<Metadata> metadataList = null;
         Metadata containerMetadata = fileResource.getMetadata();
-        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(contentHandlerFactory, -1);
+        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(contentHandlerFactory,
+                -1, metadataFilter);
         try {
             parse(fileResource.getResourceId(), parser, is, handler,
                     containerMetadata, context);
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/StreamOutRPWFSConsumer.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/StreamOutRPWFSConsumer.java
index 018c1a9..dd39a6c 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/StreamOutRPWFSConsumer.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/StreamOutRPWFSConsumer.java
@@ -20,12 +20,15 @@ package org.apache.tika.batch.fs;
 
 
 import org.apache.commons.io.IOUtils;
+import org.apache.tika.Tika;
 import org.apache.tika.batch.FileResource;
 import org.apache.tika.batch.OutputStreamFactory;
 import org.apache.tika.batch.ParserFactory;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.metadata.serialization.JsonStreamingSerializer;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -53,17 +56,19 @@ public class StreamOutRPWFSConsumer extends AbstractFSConsumer {
     private final Parser parser;
     private final ContentHandlerFactory contentHandlerFactory;
     private final OutputStreamFactory fsOSFactory;
+    private final MetadataFilter metadataFilter;
     private String outputEncoding = "UTF-8";
 
 
     public StreamOutRPWFSConsumer(ArrayBlockingQueue<FileResource> queue,
                                   Parser parser,
                                   ContentHandlerFactory contentHandlerFactory,
-                                  OutputStreamFactory fsOSFactory) {
+                                  OutputStreamFactory fsOSFactory, MetadataFilter metadataFilter) {
         super(queue);
         this.contentHandlerFactory = contentHandlerFactory;
         this.fsOSFactory = fsOSFactory;
         this.parser = parser;
+        this.metadataFilter = metadataFilter;
     }
 
     @Override
@@ -93,7 +98,8 @@ public class StreamOutRPWFSConsumer extends AbstractFSConsumer {
         JsonStreamingSerializer writer = new JsonStreamingSerializer(
                 new OutputStreamWriter(os, StandardCharsets.UTF_8));
 
-        WriteoutRPWHandler handler = new WriteoutRPWHandler(contentHandlerFactory, writer);
+        WriteoutRPWHandler handler = new WriteoutRPWHandler(contentHandlerFactory,
+                writer, metadataFilter);
         Throwable thrown = null;
         try {
             parse(fileResource.getResourceId(), parser, is, handler,
@@ -137,16 +143,24 @@ public class StreamOutRPWFSConsumer extends AbstractFSConsumer {
     //be written straight to disk.
     private class WriteoutRPWHandler extends AbstractRecursiveParserWrapperHandler {
         private final JsonStreamingSerializer jsonWriter;
+        private final MetadataFilter metadataFilter;
 
-        public WriteoutRPWHandler(ContentHandlerFactory contentHandlerFactory, JsonStreamingSerializer writer) {
+        public WriteoutRPWHandler(ContentHandlerFactory contentHandlerFactory, JsonStreamingSerializer writer,
+                                  MetadataFilter metadataFilter) {
             super(contentHandlerFactory);
             this.jsonWriter = writer;
+            this.metadataFilter = metadataFilter;
         }
 
         @Override
         public void endEmbeddedDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException {
             metadata.add(RecursiveParserWrapperHandler.TIKA_CONTENT, contentHandler.toString());
             try {
+                metadataFilter.filter(metadata);
+            } catch (TikaException e) {
+                throw new SAXException(e);
+            }
+            try {
                 jsonWriter.add(metadata);
             } catch (IOException e) {
                 throw new SAXException(e);
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
index 88171ee..4f05324 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
@@ -42,6 +42,9 @@ import org.apache.tika.batch.fs.FSUtil;
 import org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer;
 import org.apache.tika.batch.fs.StreamOutRPWFSConsumer;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.NoOpFilter;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.sax.BasicContentHandlerFactory;
@@ -145,15 +148,19 @@ public class BasicTikaFSConsumersBuilder extends AbstractConsumersBuilder {
                 contentHandlerFactory, recursiveParserWrapper);
         Parser parser = parserFactory.getParser(config);
         if (recursiveParserWrapper) {
+            MetadataFilter metadataFilter = config.getMetadataFilter();
             parser = new RecursiveParserWrapper(parser);
+
             for (int i = 0; i < numConsumers; i++) {
                 FileResourceConsumer c = null;
                 if (streamOut){
                     c = new StreamOutRPWFSConsumer(queue,
-                            parser, contentHandlerFactory, outputStreamFactory);
+                            parser, contentHandlerFactory,
+                            outputStreamFactory, metadataFilter);
                 } else {
                     c = new RecursiveParserWrapperFSConsumer(queue,
-                            parser, contentHandlerFactory, outputStreamFactory);
+                            parser, contentHandlerFactory,
+                            outputStreamFactory, metadataFilter);
                 }
                 consumers.add(c);
             }
diff --git a/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java b/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
index 7ebe564..6a61414 100644
--- a/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
+++ b/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
@@ -34,6 +34,7 @@ import org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.filter.NoOpFilter;
 import org.apache.tika.metadata.serialization.JsonMetadataList;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
@@ -75,7 +76,7 @@ public class RecursiveParserWrapperFSConsumerTest extends TikaTest {
         Parser p = new RecursiveParserWrapper(new AutoDetectParserFactory().getParser(new TikaConfig()));
         RecursiveParserWrapperFSConsumer consumer = new RecursiveParserWrapperFSConsumer(
                 queue, p, new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
-                mockOSFactory);
+                mockOSFactory, NoOpFilter.NOOP_FILTER);
 
         IFileProcessorFutureResult result = consumer.call();
         mockOSFactory.getStreams().get(0).flush();
@@ -123,7 +124,7 @@ public class RecursiveParserWrapperFSConsumerTest extends TikaTest {
         Parser p = new RecursiveParserWrapper(new AutoDetectParserFactory().getParser(new TikaConfig()));
         RecursiveParserWrapperFSConsumer consumer = new RecursiveParserWrapperFSConsumer(
                 queue, p, new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
-                mockOSFactory);
+                mockOSFactory, NoOpFilter.NOOP_FILTER);
 
         IFileProcessorFutureResult result = consumer.call();
         mockOSFactory.getStreams().get(0).flush();
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 92485d3..a0cc102 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -50,6 +50,9 @@ import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.language.translate.DefaultTranslator;
 import org.apache.tika.language.translate.Translator;
+import org.apache.tika.metadata.filter.CompositeMetadataFilter;
+import org.apache.tika.metadata.filter.DefaultMetadataFilter;
+import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.mime.MimeTypeException;
@@ -104,6 +107,10 @@ public class TikaConfig {
         return new SimpleThreadPoolExecutor();
     }
 
+    private static MetadataFilter getDefaultMetadataFilter(ServiceLoader loader) {
+        return new DefaultMetadataFilter(loader);
+    }
+
     //use this to look for unneeded instantiations of TikaConfig
     protected static AtomicInteger TIMES_INSTANTIATED = new AtomicInteger();
 
@@ -115,6 +122,7 @@ public class TikaConfig {
     private final MimeTypes mimeTypes;
     private final ExecutorService executorService;
     private final EncodingDetector encodingDetector;
+    private final MetadataFilter metadataFilter;
 
     public TikaConfig(String file)
             throws TikaException, IOException, SAXException {
@@ -180,6 +188,7 @@ public class TikaConfig {
         TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
         ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
         EncodingDetectorXmlLoader encodingDetectorXmlLoader = new EncodingDetectorXmlLoader();
+        MetadataFilterXmlLoader metadataFilterXmlLoader = new MetadataFilterXmlLoader();
         updateXMLReaderUtils(element);
         this.mimeTypes = typesFromDomElement(element);
         this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
@@ -189,6 +198,7 @@ public class TikaConfig {
         this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
         this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
         this.executorService = executorLoader.loadOverall(element, mimeTypes, loader);
+        this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, mimeTypes, loader);
         this.serviceLoader = loader;
         TIMES_INSTANTIATED.incrementAndGet();
     }
@@ -214,6 +224,7 @@ public class TikaConfig {
         this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector);
         this.translator = getDefaultTranslator(serviceLoader);
         this.executorService = getDefaultExecutorService();
+        this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
         TIMES_INSTANTIATED.incrementAndGet();
     }
 
@@ -249,6 +260,7 @@ public class TikaConfig {
             this.detector = getDefaultDetector(mimeTypes, serviceLoader);
             this.translator = getDefaultTranslator(serviceLoader);
             this.executorService = getDefaultExecutorService();
+            this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
         } else {
             ServiceLoader tmpServiceLoader = new ServiceLoader();
             try (InputStream stream = getConfigInputStream(config, tmpServiceLoader)) {
@@ -259,7 +271,8 @@ public class TikaConfig {
                 EncodingDetectorXmlLoader encodingDetectorLoader = new EncodingDetectorXmlLoader();
                 TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
                 ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
-                
+                MetadataFilterXmlLoader metadataFilterXmlLoader = new MetadataFilterXmlLoader();
+
                 this.mimeTypes = typesFromDomElement(element);
                 this.encodingDetector = encodingDetectorLoader.loadOverall(element, mimeTypes, serviceLoader);
 
@@ -269,6 +282,7 @@ public class TikaConfig {
                 this.detector = detectorLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.translator = translatorLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.executorService = executorLoader.loadOverall(element, mimeTypes, serviceLoader);
+                this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, mimeTypes, serviceLoader);
             } catch (SAXException e) {
                 throw new TikaException(
                         "Specified Tika configuration has syntax errors: "
@@ -393,6 +407,9 @@ public class TikaConfig {
         return serviceLoader;
     }
 
+    public MetadataFilter getMetadataFilter() {
+        return metadataFilter;
+    }
     /**
      * Provides a default configuration (TikaConfig).  Currently creates a
      * new instance each time it's called; we may be able to have it
@@ -1101,7 +1118,8 @@ public class TikaConfig {
         }
 
         @Override
-        CompositeEncodingDetector createComposite(List<EncodingDetector> encodingDetectors, MimeTypes mimeTypes, ServiceLoader loader) {
+        CompositeEncodingDetector createComposite(List<EncodingDetector> encodingDetectors,
+                                                  MimeTypes mimeTypes, ServiceLoader loader) {
             return new CompositeEncodingDetector(encodingDetectors);
         }
 
@@ -1142,5 +1160,91 @@ public class TikaConfig {
         }
     }
 
+    private static class MetadataFilterXmlLoader extends
+            XmlLoader<MetadataFilter, MetadataFilter> {
+
+        boolean supportsComposite() {
+            return true;
+        }
+
+        String getParentTagName() {
+            return "metadataFilters";
+        }
+
+        String getLoaderTagName() {
+            return "metadataFilter";
+        }
+
+        @Override
+        Class<? extends MetadataFilter> getLoaderClass() {
+            return MetadataFilter.class;
+        }
+
+
+        @Override
+        boolean isComposite(MetadataFilter loaded) {
+            return loaded instanceof CompositeMetadataFilter;
+        }
+
+        @Override
+        boolean isComposite(Class<? extends MetadataFilter> loadedClass) {
+            return CompositeMetadataFilter.class.isAssignableFrom(loadedClass);
+        }
+
+        @Override
+        MetadataFilter preLoadOne(Class<? extends MetadataFilter> loadedClass,
+                                    String classname, MimeTypes mimeTypes) throws TikaException {
+            // Check for classes which can't be set in config
+            // Continue with normal loading
+            return null;
+        }
+
+        @Override
+        MetadataFilter createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+            return getDefaultMetadataFilter(loader);
+        }
+
+        //this ignores the service loader
+        @Override
+        MetadataFilter createComposite(List<MetadataFilter> loaded, MimeTypes mimeTypes, ServiceLoader loader) {
+            return new DefaultMetadataFilter(loaded);
+        }
+
+        @Override
+        MetadataFilter createComposite(Class<? extends MetadataFilter> metadataFilterClass,
+                                         List<MetadataFilter> childMetadataFilters,
+                                         Set<Class<? extends MetadataFilter>> excludeFilters,
+                                         Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader)
+                throws InvocationTargetException, IllegalAccessException,
+                InstantiationException {
+            MetadataFilter metadataFilter = null;
+            Constructor<? extends MetadataFilter> c;
+
+            // Try the possible default and composite detector constructors
+            if (metadataFilter == null) {
+                try {
+                    c = metadataFilterClass.getConstructor(ServiceLoader.class, Collection.class);
+                    metadataFilter = c.newInstance(loader, excludeFilters);
+                } catch (NoSuchMethodException me) {
+                    me.printStackTrace();
+                }
+            }
+            if (metadataFilter == null) {
+                try {
+                    c = metadataFilterClass.getConstructor(List.class);
+                    metadataFilter = c.newInstance(childMetadataFilters);
+                } catch (NoSuchMethodException me) {
+                    me.printStackTrace();
+                }
+            }
+
+            return metadataFilter;
+        }
+
+        @Override
+        MetadataFilter decorate(MetadataFilter created, Element element) {
+            return created; // No decoration of MetadataFilters
+        }
+    }
 
 }
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
new file mode 100644
index 0000000..05324f2
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * This class clears the entire metadata object if the
+ * mime matches the mime filter.  The idea is that you might not want
+ * to store/transmit metadata for images or specific file types.
+ */
+public class ClearByMimeMetadataFilter implements MetadataFilter {
+    private final Set<String> mimes;
+
+    public ClearByMimeMetadataFilter() {
+        this(new HashSet<>());
+    }
+
+    public ClearByMimeMetadataFilter(Set<String> mimes) {
+        this.mimes = mimes;
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        String mimeString = metadata.get(Metadata.CONTENT_TYPE);
+        if (mimeString == null) {
+            return;
+        }
+        MediaType mt = MediaType.parse(mimeString);
+        if (mt != null) {
+            mimeString = mt.getBaseType().toString();
+        }
+        if (mimes.contains(mimeString)) {
+            for (String n : metadata.names()) {
+                metadata.remove(n);
+            }
+
+        }
+    }
+
+    /**
+     *
+     * @param mimesString comma-delimited list of mimes that will trigger complete removal of metadata
+     */
+    @Field
+    public void setMimes(String mimesString) {
+        for (String include : mimesString.split(",")) {
+            mimes.add(include);
+        }
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
new file mode 100644
index 0000000..4d592c9
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.util.List;
+
+public class CompositeMetadataFilter implements MetadataFilter {
+
+    private final List<MetadataFilter> filters;
+
+    public CompositeMetadataFilter(List<MetadataFilter> filters) {
+        this.filters = filters;
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        for (MetadataFilter filter : filters) {
+            filter.filter(metadata);
+        }
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/DefaultMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/DefaultMetadataFilter.java
new file mode 100644
index 0000000..7671f50
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/DefaultMetadataFilter.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.utils.ServiceLoaderUtils;
+
+import java.util.List;
+
+public class DefaultMetadataFilter extends CompositeMetadataFilter {
+
+    private static List<MetadataFilter> getDefaultFilters(
+            ServiceLoader loader) {
+        List<MetadataFilter> detectors = loader.loadStaticServiceProviders(MetadataFilter.class);
+        ServiceLoaderUtils.sortLoadedClasses(detectors);
+
+        return detectors;
+    }
+
+    public DefaultMetadataFilter(ServiceLoader serviceLoader) {
+        super(getDefaultFilters(serviceLoader));
+    }
+
+    public DefaultMetadataFilter(List<MetadataFilter> metadataFilters) {
+        super(metadataFilters);
+    }
+
+    public DefaultMetadataFilter() {
+        this(new ServiceLoader());
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
new file mode 100644
index 0000000..3b6e2a0
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class ExcludeFieldMetadataFilter implements MetadataFilter {
+    private final Set<String> exclude;
+
+    public ExcludeFieldMetadataFilter() {
+        this(new HashSet<>());
+    }
+    public ExcludeFieldMetadataFilter(Set<String> exclude) {
+        this.exclude = exclude;
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        for (String field : exclude) {
+            metadata.remove(field);
+        }
+    }
+
+    /**
+     *
+     * @param excludeString comma-delimited list of fields to exclude
+     */
+    @Field
+    public void setExclude(String excludeString) {
+        for (String include : excludeString.split(",")) {
+            exclude.add(include);
+        }
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
new file mode 100644
index 0000000..4bc6c9e
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+public class IncludeFieldMetadataFilter implements MetadataFilter {
+    private final Set<String> includeSet;
+
+    public IncludeFieldMetadataFilter() {
+        this(new HashSet<>());
+    }
+
+    public IncludeFieldMetadataFilter(Set<String> fields) {
+        this.includeSet = fields;
+    }
+
+    /**
+     *
+     * @param includeString comma-delimited list of fields to include
+     */
+    @Field
+    public void setInclude(String includeString) {
+        for (String include : includeString.split(",")) {
+            includeSet.add(include);
+        }
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+
+        for (String n : metadata.names()) {
+            if (! includeSet.contains(n)) {
+                metadata.remove(n);
+            }
+        }
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
new file mode 100644
index 0000000..7a8f345
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.io.Serializable;
+
+/**
+ * Filters the metadata in place
+ *
+ * @since Apache Tika 1.25
+ */
+public interface MetadataFilter extends Serializable {
+
+    void filter(Metadata metadata) throws TikaException;
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
new file mode 100644
index 0000000..9cd1ec3
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * This filter performs no operations on the metadata
+ * and leaves it untouched.
+ */
+public class NoOpFilter implements MetadataFilter {
+
+    public static NoOpFilter NOOP_FILTER = new NoOpFilter();
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        //no op
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java b/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
index 408598f..50f0fb8 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
@@ -16,7 +16,10 @@
  */
 package org.apache.tika.sax;
 
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.NoOpFilter;
 import org.apache.tika.utils.ParserUtils;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -40,12 +43,13 @@ import java.util.List;
 public class RecursiveParserWrapperHandler extends AbstractRecursiveParserWrapperHandler {
 
     protected final List<Metadata> metadataList = new LinkedList<>();
+    private final MetadataFilter metadataFilter;
 
     /**
      * Create a handler with no limit on the number of embedded resources
      */
     public RecursiveParserWrapperHandler(ContentHandlerFactory contentHandlerFactory) {
-        super(contentHandlerFactory);
+        this(contentHandlerFactory, -1, NoOpFilter.NOOP_FILTER);
     }
 
     /**
@@ -54,7 +58,13 @@ public class RecursiveParserWrapperHandler extends AbstractRecursiveParserWrappe
      * @param maxEmbeddedResources number of embedded resources that will be parsed
      */
     public RecursiveParserWrapperHandler(ContentHandlerFactory contentHandlerFactory, int maxEmbeddedResources) {
+        this(contentHandlerFactory, maxEmbeddedResources, NoOpFilter.NOOP_FILTER);
+    }
+
+    public RecursiveParserWrapperHandler(ContentHandlerFactory contentHandlerFactory, int maxEmbeddedResources,
+                                         MetadataFilter metadataFilter) {
         super(contentHandlerFactory, maxEmbeddedResources);
+        this.metadataFilter = metadataFilter;
     }
 
     /**
@@ -79,7 +89,15 @@ public class RecursiveParserWrapperHandler extends AbstractRecursiveParserWrappe
     public void endEmbeddedDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException {
         super.endEmbeddedDocument(contentHandler, metadata);
         addContent(contentHandler, metadata);
-        metadataList.add(ParserUtils.cloneMetadata(metadata));
+        try {
+            metadataFilter.filter(metadata);
+        } catch (TikaException e) {
+            throw new SAXException(e);
+        }
+
+        if (metadata.size() > 0) {
+            metadataList.add(ParserUtils.cloneMetadata(metadata));
+        }
     }
 
     /**
@@ -92,8 +110,15 @@ public class RecursiveParserWrapperHandler extends AbstractRecursiveParserWrappe
     public void endDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException {
         super.endDocument(contentHandler, metadata);
         addContent(contentHandler, metadata);
+        try {
+            metadataFilter.filter(metadata);
+        } catch (TikaException e) {
+            throw new SAXException(e);
+        }
 
-        metadataList.add(0, ParserUtils.cloneMetadata(metadata));
+        if (metadata.size() > 0) {
+            metadataList.add(0, ParserUtils.cloneMetadata(metadata));
+        }
     }
 
     /**
diff --git a/tika-core/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter b/tika-core/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter
new file mode 100644
index 0000000..604a480
--- /dev/null
+++ b/tika-core/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.metadata.filter.NoOpFilter
\ No newline at end of file
diff --git a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
index 5c406cd..1b8722d 100644
--- a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
+++ b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
@@ -327,4 +327,6 @@ public class TikaConfigTest extends AbstractTikaConfigTest {
         getConfig("TIKA-2732-xmlreaderutils-exc.xml");
     }
 
+
+
 }
\ No newline at end of file
diff --git a/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java b/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
new file mode 100644
index 0000000..0632dd4
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.util.Locale;
+
+/**
+ * Mock Filter for testing uppercasing of all values
+ */
+public class MockUpperCaseFilter implements MetadataFilter {
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        for (String n : metadata.names()) {
+            String[] vals = metadata.getValues(n);
+            metadata.remove(n);
+            for (int i = 0; i < vals.length; i++) {
+                metadata.add(n, vals[i].toUpperCase(Locale.US));
+            }
+        }
+    }
+}
diff --git a/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java b/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
new file mode 100644
index 0000000..e933d0c
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.AbstractTikaConfigTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.junit.Test;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+public class TestMetadataFilter extends AbstractTikaConfigTest {
+
+    @Test
+    public void testDefault() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+
+        MetadataFilter defaultFilter = new DefaultMetadataFilter();
+        defaultFilter.filter(metadata);
+
+        assertEquals(2, metadata.names().length);
+        assertEquals("title", metadata.get("title"));
+        assertEquals("author", metadata.get("author"));
+    }
+
+    @Test
+    public void testIncludeFilter() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+
+        MetadataFilter filter = new IncludeFieldMetadataFilter(set("title"));
+        filter.filter(metadata);
+        assertEquals(1, metadata.names().length);
+        assertEquals("title", metadata.get("title"));
+        assertNull(metadata.get("author"));
+    }
+
+    @Test
+    public void testExcludeFilter() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+
+        MetadataFilter filter = new ExcludeFieldMetadataFilter(set("title"));
+        filter.filter(metadata);
+        assertEquals(1, metadata.names().length);
+        assertEquals("author", metadata.get("author"));
+        assertNull(metadata.get("title"));
+    }
+
+    @Test
+    public void testConfigIncludeFilter() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-include.xml");
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+        metadata.set("content", "content");
+
+        config.getMetadataFilter().filter(metadata);
+
+        assertEquals(2, metadata.size());
+        assertEquals("title", metadata.get("title"));
+        assertEquals("author", metadata.get("author"));
+    }
+
+    @Test
+    public void testConfigExcludeFilter() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-exclude.xml");
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+        metadata.set("content", "content");
+
+        config.getMetadataFilter().filter(metadata);
+
+        assertEquals(1, metadata.size());
+        assertEquals("content", metadata.get("content"));
+    }
+
+    @Test
+    public void testConfigIncludeAndUCFilter() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-include-uc.xml");
+        String[] expectedTitles = new String[]{
+                "TITLE1", "TITLE2", "TITLE3"
+        };
+        Metadata metadata = new Metadata();
+        metadata.add("title", "title1");
+        metadata.add("title", "title2");
+        metadata.add("title", "title3");
+        metadata.set("author", "author");
+        metadata.set("content", "content");
+
+        config.getMetadataFilter().filter(metadata);
+
+        assertEquals(2, metadata.size());
+        assertArrayEquals(expectedTitles, metadata.getValues("title"));
+        assertEquals("AUTHOR", metadata.get("author"));
+    }
+
+    @Test
+    public void testMimeClearingFilter() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, MediaType.image("jpeg").toString());
+        metadata.set("author", "author");
+
+        MetadataFilter filter = new ClearByMimeMetadataFilter(set("image/jpeg","application/pdf"));
+        filter.filter(metadata);
+        assertEquals(0, metadata.size());
+
+        metadata.set(Metadata.CONTENT_TYPE, MediaType.text("plain").toString());
+        metadata.set("author", "author");
+        filter.filter(metadata);
+        assertEquals(2, metadata.size());
+        assertEquals("author", metadata.get("author"));
+
+    }
+
+    @Test
+    public void testMimeClearingFilterConfig() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-mimes-uc.xml");
+
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, MediaType.image("jpeg").toString());
+        metadata.set("author", "author");
+
+        MetadataFilter filter = config.getMetadataFilter();
+        filter.filter(metadata);
+        debug(metadata);
+        assertEquals(0, metadata.size());
+
+        metadata.set(Metadata.CONTENT_TYPE, MediaType.text("plain").toString());
+        metadata.set("author", "author");
+        filter.filter(metadata);
+        assertEquals(2, metadata.size());
+        assertEquals("AUTHOR", metadata.get("author"));
+
+    }
+
+    private static Set<String> set(String ... items) {
+        Set<String> set = new HashSet<>();
+        for (String item : items) {
+            set.add(item);
+        }
+        return set;
+    }
+}
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
new file mode 100644
index 0000000..27517f6
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.ExcludeFieldMetadataFilter">
+      <params>
+        <param name="exclude" type="string">title,author</param>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+</properties>
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
new file mode 100644
index 0000000..e0df476
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+      <params>
+        <param name="include" type="string">title,author</param>
+      </params>
+    </metadataFilter>
+    <metadataFilter class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/>
+  </metadataFilters>
+</properties>
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
new file mode 100644
index 0000000..e92dff8
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+      <params>
+        <param name="include" type="string">title,author</param>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+</properties>
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
new file mode 100644
index 0000000..486280c
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
+      <params>
+        <param name="mimes" type="string">image/jpeg,application/pdf</param>
+      </params>
+    </metadataFilter>
+    <metadataFilter class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/>
+  </metadataFilters>
+</properties>
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index a5182c6..349f271 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -21,6 +21,7 @@ package org.apache.tika.parser;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -30,6 +31,7 @@ import java.util.Set;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.ClosedInputStream;
 import org.apache.tika.io.ProxyInputStream;
@@ -365,6 +367,47 @@ public class RecursiveParserWrapperTest extends TikaTest {
 
     }
 
+    @Test
+    public void testIncludeFilter() throws Exception {
+        //TIKA-3137
+        ParseContext context = new ParseContext();
+        Metadata metadata = new Metadata();
+        TikaConfig tikaConfig = new TikaConfig(getClass().getResourceAsStream("TIKA-3137-include.xml"));
+        Parser p = new AutoDetectParser(tikaConfig);
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p, true);
+        String path = "/test-documents/test_recursive_embedded.docx";
+        ContentHandlerFactory contentHandlerFactory =
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT,
+                        -1);
+
+        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(contentHandlerFactory,
+                -1, tikaConfig.getMetadataFilter());
+        try (InputStream is = getClass().getResourceAsStream(path)) {
+            wrapper.parse(is, handler, metadata, context);
+        }
+        List<Metadata> metadataList = handler.getMetadataList();
+        assertEquals(5, metadataList.size());
+
+        Set<String> expectedKeys = new HashSet<>();
+        expectedKeys.add("X-TIKA:content");
+        expectedKeys.add("extended-properties:Application");
+        expectedKeys.add("Content-Type");
+        for (Metadata m : metadataList) {
+            if (m.get(Metadata.CONTENT_TYPE).equals("image/emf")) {
+                fail("emf should have been filtered out");
+            }
+            if (m.get(Metadata.CONTENT_TYPE).startsWith("text/plain")) {
+                fail("text/plain should have been filtered out");
+            }
+            assertTrue(m.names().length >= 2);
+            for (String n : m.names()) {
+                if (! expectedKeys.contains(n)) {
+                    fail("didn't expect "+n);
+                }
+            }
+        }
+    }
+
     private List<Metadata> getMetadata(Metadata metadata, ContentHandlerFactory contentHandlerFactory,
                                        boolean catchEmbeddedExceptions,
                                        DigestingParser.Digester digester) throws Exception {
diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
new file mode 100644
index 0000000..765bc11
--- /dev/null
+++ b/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+      <params>
+        <param name="include" type="string">X-TIKA:content,extended-properties:Application,Content-Type</param>
+      </params>
+    </metadataFilter>
+    <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
+      <params>
+        <param name="mimes" type="string">image/emf,text/plain</param>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+</properties>
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
index 07d20c5..71e7180 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
@@ -152,7 +152,8 @@ public class RecursiveMetadataResource {
         BasicContentHandlerFactory.HANDLER_TYPE type =
                 BasicContentHandlerFactory.parseHandlerType(handlerTypeName, DEFAULT_HANDLER_TYPE);
 		RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(
-		        new BasicContentHandlerFactory(type, writeLimit), maxEmbeddedResources);
+		        new BasicContentHandlerFactory(type, writeLimit), maxEmbeddedResources,
+                TikaResource.getConfig().getMetadataFilter());
 		try {
             TikaResource.parse(wrapper, LOG, info.getPath(), is, handler, metadata, context);
         } catch (SecurityException e) {
diff --git a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
index 92c9d34..8b5f153 100644
--- a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
+++ b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
@@ -90,7 +90,8 @@ public abstract class CXFTestBase {
 
     @Before
     public void setUp() throws Exception {
-        this.tika = new TikaConfig(getClass().getResourceAsStream("tika-config-for-server-tests.xml"));
+
+        this.tika = new TikaConfig(getTikaConfigInputStream());
         TikaResource.init(tika,
                 new CommonsDigester(DIGESTER_READ_LIMIT, "md5,sha1:32"),
                 new DefaultInputStreamFactory(), new ServerStatus(true));
@@ -120,6 +121,10 @@ public abstract class CXFTestBase {
         server = sf.create();
     }
 
+    protected InputStream getTikaConfigInputStream() {
+        return getClass().getResourceAsStream("tika-config-for-server-tests.xml");
+    }
+
     /**
      * Have the test do {@link JAXRSServerFactoryBean#setResourceClasses(Class...)}
      * and {@link JAXRSServerFactoryBean#setResourceProvider(Class, org.apache.cxf.jaxrs.lifecycle.ResourceProvider)}
diff --git a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java
new file mode 100644
index 0000000..748ee77
--- /dev/null
+++ b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.ext.multipart.Attachment;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
+import org.apache.tika.sax.RecursiveParserWrapperHandler;
+import org.apache.tika.server.resource.RecursiveMetadataResource;
+import org.apache.tika.server.writer.MetadataListMessageBodyWriter;
+import org.junit.Test;
+
+import javax.ws.rs.core.Response;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.tika.TikaTest.assertNotContained;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+public class RecursiveMetadataFilterTest extends CXFTestBase {
+
+    private static final String META_PATH = "/rmeta";
+
+    private static final String TEST_RECURSIVE_DOC = "test_recursive_embedded.docx";
+
+    @Override
+    protected InputStream getTikaConfigInputStream() {
+        return getClass().getResourceAsStream("TIKA-3137-include.xml");
+    }
+    @Override
+    protected void setUpResources(JAXRSServerFactoryBean sf) {
+        sf.setResourceClasses(RecursiveMetadataResource.class);
+        sf.setResourceProvider(RecursiveMetadataResource.class,
+                new SingletonResourceProvider(new RecursiveMetadataResource()));
+    }
+
+    @Override
+    protected void setUpProviders(JAXRSServerFactoryBean sf) {
+        List<Object> providers = new ArrayList<>();
+        providers.add(new MetadataListMessageBodyWriter());
+        sf.setProviders(providers);
+    }
+
+    @Test
+    public void testBasicFilter() throws Exception {
+        Response response = WebClient
+                .create(endPoint + META_PATH)
+                .accept("application/json")
+                .acceptEncoding("gzip")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+        Reader reader = new InputStreamReader(new GzipCompressorInputStream((InputStream) response.getEntity()), UTF_8);
+        List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals(5, metadataList.size());
+
+        Set<String> expectedKeys = new HashSet<>();
+        expectedKeys.add("X-TIKA:content");
+        expectedKeys.add("extended-properties:Application");
+        expectedKeys.add("Content-Type");
+        for (Metadata m : metadataList) {
+            if (m.get(Metadata.CONTENT_TYPE).equals("image/emf")) {
+                fail("emf should have been filtered out");
+            }
+            if (m.get(Metadata.CONTENT_TYPE).startsWith("text/plain")) {
+                fail("text/plain should have been filtered out");
+            }
+            assertTrue(m.names().length >= 2);
+            for (String n : m.names()) {
+                if (! expectedKeys.contains(n)) {
+                    fail("didn't expect "+n);
+                }
+            }
+        }
+    }
+}
diff --git a/tika-server/src/test/resources/org/apache/tika/server/TIKA-3137-include.xml b/tika-server/src/test/resources/org/apache/tika/server/TIKA-3137-include.xml
new file mode 100644
index 0000000..765bc11
--- /dev/null
+++ b/tika-server/src/test/resources/org/apache/tika/server/TIKA-3137-include.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+      <params>
+        <param name="include" type="string">X-TIKA:content,extended-properties:Application,Content-Type</param>
+      </params>
+    </metadataFilter>
+    <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
+      <params>
+        <param name="mimes" type="string">image/emf,text/plain</param>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+</properties>