You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/07/16 19:58:23 UTC

[tika] branch TIKA-3137 created (now 3bdcd97)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-3137
in repository https://gitbox.apache.org/repos/asf/tika.git.


      at 3bdcd97  TIKA-3137 -- first pass, need to add unit tests for tika-batch

This branch includes the following new commits:

     new 3bdcd97  TIKA-3137 -- first pass, need to add unit tests for tika-batch

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[tika] 01/01: TIKA-3137 -- first pass, need to add unit tests for tika-batch

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3137
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 3bdcd9714b8259c7c8c2ae1d5837148016d5bdaf
Author: tallison <ta...@apache.org>
AuthorDate: Thu Jul 16 15:58:00 2020 -0400

    TIKA-3137 -- first pass, need to add unit tests for tika-batch
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |   4 +-
 .../batch/fs/RecursiveParserWrapperFSConsumer.java |   9 +-
 .../tika/batch/fs/StreamOutRPWFSConsumer.java      |  20 ++-
 .../fs/builders/BasicTikaFSConsumersBuilder.java   |  11 +-
 .../RecursiveParserWrapperFSConsumerTest.java      |   5 +-
 .../java/org/apache/tika/config/TikaConfig.java    | 108 ++++++++++++-
 .../metadata/filter/ClearByMimeMetadataFilter.java |  74 +++++++++
 .../metadata/filter/CompositeMetadataFilter.java   |  38 +++++
 .../metadata/filter/DefaultMetadataFilter.java     |  46 ++++++
 .../filter/ExcludeFieldMetadataFilter.java         |  53 +++++++
 .../filter/IncludeFieldMetadataFilter.java         |  58 +++++++
 .../tika/metadata/filter/MetadataFilter.java       |  33 ++++
 .../apache/tika/metadata/filter/NoOpFilter.java    |  34 +++++
 .../tika/sax/RecursiveParserWrapperHandler.java    |  31 +++-
 .../org.apache.tika.metadata.filter.MetadataFilter |  16 ++
 .../org/apache/tika/config/TikaConfigTest.java     |   2 +
 .../tika/metadata/filter/MockUpperCaseFilter.java  |  39 +++++
 .../tika/metadata/filter/TestMetadataFilter.java   | 170 +++++++++++++++++++++
 .../org/apache/tika/config/TIKA-3137-exclude.xml   |  26 ++++
 .../apache/tika/config/TIKA-3137-include-uc.xml    |  27 ++++
 .../org/apache/tika/config/TIKA-3137-include.xml   |  26 ++++
 .../org/apache/tika/config/TIKA-3137-mimes-uc.xml  |  27 ++++
 .../tika/parser/RecursiveParserWrapperTest.java    |  43 ++++++
 .../org/apache/tika/parser/TIKA-3137-include.xml   |  31 ++++
 .../server/resource/RecursiveMetadataResource.java |   3 +-
 .../java/org/apache/tika/server/CXFTestBase.java   |   7 +-
 .../tika/server/RecursiveMetadataFilterTest.java   | 107 +++++++++++++
 .../org/apache/tika/server/TIKA-3137-include.xml   |  31 ++++
 28 files changed, 1062 insertions(+), 17 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 60cb05e..7730eda 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -508,7 +508,9 @@ public class TikaCLI {
     private void handleRecursiveJson(URL url, OutputStream output) throws IOException, SAXException, TikaException {
         Metadata metadata = new Metadata();
         RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser);
-        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(getContentHandlerFactory(type), -1);
+        RecursiveParserWrapperHandler handler =
+                new RecursiveParserWrapperHandler(getContentHandlerFactory(type),
+                        -1, config.getMetadataFilter());
         try (InputStream input = TikaInputStream.get(url, metadata)) {
             wrapper.parse(input, handler, metadata, context);
         }
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
index 56b8b58..9732781 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/RecursiveParserWrapperFSConsumer.java
@@ -32,6 +32,8 @@ import org.apache.tika.batch.ParserFactory;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.NoOpFilter;
 import org.apache.tika.metadata.serialization.JsonMetadataList;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -50,6 +52,7 @@ public class RecursiveParserWrapperFSConsumer extends AbstractFSConsumer {
     private final Parser parser;
     private final ContentHandlerFactory contentHandlerFactory;
     private final OutputStreamFactory fsOSFactory;
+    private final MetadataFilter metadataFilter;
     private String outputEncoding = "UTF-8";
 
     /**
@@ -62,11 +65,12 @@ public class RecursiveParserWrapperFSConsumer extends AbstractFSConsumer {
     public RecursiveParserWrapperFSConsumer(ArrayBlockingQueue<FileResource> queue,
                                             Parser parser,
                                             ContentHandlerFactory contentHandlerFactory,
-                                            OutputStreamFactory fsOSFactory) {
+                                            OutputStreamFactory fsOSFactory, MetadataFilter metadataFilter) {
         super(queue);
         this.contentHandlerFactory = contentHandlerFactory;
         this.fsOSFactory = fsOSFactory;
         this.parser = parser;
+        this.metadataFilter = metadataFilter;
     }
 
     @Override
@@ -95,7 +99,8 @@ public class RecursiveParserWrapperFSConsumer extends AbstractFSConsumer {
         Throwable thrown = null;
         List<Metadata> metadataList = null;
         Metadata containerMetadata = fileResource.getMetadata();
-        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(contentHandlerFactory, -1);
+        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(contentHandlerFactory,
+                -1, metadataFilter);
         try {
             parse(fileResource.getResourceId(), parser, is, handler,
                     containerMetadata, context);
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/StreamOutRPWFSConsumer.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/StreamOutRPWFSConsumer.java
index 018c1a9..dd39a6c 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/StreamOutRPWFSConsumer.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/StreamOutRPWFSConsumer.java
@@ -20,12 +20,15 @@ package org.apache.tika.batch.fs;
 
 
 import org.apache.commons.io.IOUtils;
+import org.apache.tika.Tika;
 import org.apache.tika.batch.FileResource;
 import org.apache.tika.batch.OutputStreamFactory;
 import org.apache.tika.batch.ParserFactory;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.metadata.serialization.JsonStreamingSerializer;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -53,17 +56,19 @@ public class StreamOutRPWFSConsumer extends AbstractFSConsumer {
     private final Parser parser;
     private final ContentHandlerFactory contentHandlerFactory;
     private final OutputStreamFactory fsOSFactory;
+    private final MetadataFilter metadataFilter;
     private String outputEncoding = "UTF-8";
 
 
     public StreamOutRPWFSConsumer(ArrayBlockingQueue<FileResource> queue,
                                   Parser parser,
                                   ContentHandlerFactory contentHandlerFactory,
-                                  OutputStreamFactory fsOSFactory) {
+                                  OutputStreamFactory fsOSFactory, MetadataFilter metadataFilter) {
         super(queue);
         this.contentHandlerFactory = contentHandlerFactory;
         this.fsOSFactory = fsOSFactory;
         this.parser = parser;
+        this.metadataFilter = metadataFilter;
     }
 
     @Override
@@ -93,7 +98,8 @@ public class StreamOutRPWFSConsumer extends AbstractFSConsumer {
         JsonStreamingSerializer writer = new JsonStreamingSerializer(
                 new OutputStreamWriter(os, StandardCharsets.UTF_8));
 
-        WriteoutRPWHandler handler = new WriteoutRPWHandler(contentHandlerFactory, writer);
+        WriteoutRPWHandler handler = new WriteoutRPWHandler(contentHandlerFactory,
+                writer, metadataFilter);
         Throwable thrown = null;
         try {
             parse(fileResource.getResourceId(), parser, is, handler,
@@ -137,16 +143,24 @@ public class StreamOutRPWFSConsumer extends AbstractFSConsumer {
     //be written straight to disk.
     private class WriteoutRPWHandler extends AbstractRecursiveParserWrapperHandler {
         private final JsonStreamingSerializer jsonWriter;
+        private final MetadataFilter metadataFilter;
 
-        public WriteoutRPWHandler(ContentHandlerFactory contentHandlerFactory, JsonStreamingSerializer writer) {
+        public WriteoutRPWHandler(ContentHandlerFactory contentHandlerFactory, JsonStreamingSerializer writer,
+                                  MetadataFilter metadataFilter) {
             super(contentHandlerFactory);
             this.jsonWriter = writer;
+            this.metadataFilter = metadataFilter;
         }
 
         @Override
         public void endEmbeddedDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException {
             metadata.add(RecursiveParserWrapperHandler.TIKA_CONTENT, contentHandler.toString());
             try {
+                metadataFilter.filter(metadata);
+            } catch (TikaException e) {
+                throw new SAXException(e);
+            }
+            try {
                 jsonWriter.add(metadata);
             } catch (IOException e) {
                 throw new SAXException(e);
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
index 88171ee..4f05324 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/builders/BasicTikaFSConsumersBuilder.java
@@ -42,6 +42,9 @@ import org.apache.tika.batch.fs.FSUtil;
 import org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer;
 import org.apache.tika.batch.fs.StreamOutRPWFSConsumer;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.NoOpFilter;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.sax.BasicContentHandlerFactory;
@@ -145,15 +148,19 @@ public class BasicTikaFSConsumersBuilder extends AbstractConsumersBuilder {
                 contentHandlerFactory, recursiveParserWrapper);
         Parser parser = parserFactory.getParser(config);
         if (recursiveParserWrapper) {
+            MetadataFilter metadataFilter = config.getMetadataFilter();
             parser = new RecursiveParserWrapper(parser);
+
             for (int i = 0; i < numConsumers; i++) {
                 FileResourceConsumer c = null;
                 if (streamOut){
                     c = new StreamOutRPWFSConsumer(queue,
-                            parser, contentHandlerFactory, outputStreamFactory);
+                            parser, contentHandlerFactory,
+                            outputStreamFactory, metadataFilter);
                 } else {
                     c = new RecursiveParserWrapperFSConsumer(queue,
-                            parser, contentHandlerFactory, outputStreamFactory);
+                            parser, contentHandlerFactory,
+                            outputStreamFactory, metadataFilter);
                 }
                 consumers.add(c);
             }
diff --git a/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java b/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
index 028acf5..680df42 100644
--- a/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
+++ b/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
@@ -34,6 +34,7 @@ import org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.filter.NoOpFilter;
 import org.apache.tika.metadata.serialization.JsonMetadataList;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
@@ -75,7 +76,7 @@ public class RecursiveParserWrapperFSConsumerTest extends TikaTest {
         Parser p = new RecursiveParserWrapper(new AutoDetectParserFactory().getParser(new TikaConfig()));
         RecursiveParserWrapperFSConsumer consumer = new RecursiveParserWrapperFSConsumer(
                 queue, p, new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
-                mockOSFactory);
+                mockOSFactory, NoOpFilter.NOOP_FILTER);
 
         IFileProcessorFutureResult result = consumer.call();
         mockOSFactory.getStreams().get(0).flush();
@@ -123,7 +124,7 @@ public class RecursiveParserWrapperFSConsumerTest extends TikaTest {
         Parser p = new RecursiveParserWrapper(new AutoDetectParserFactory().getParser(new TikaConfig()));
         RecursiveParserWrapperFSConsumer consumer = new RecursiveParserWrapperFSConsumer(
                 queue, p, new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
-                mockOSFactory);
+                mockOSFactory, NoOpFilter.NOOP_FILTER);
 
         IFileProcessorFutureResult result = consumer.call();
         mockOSFactory.getStreams().get(0).flush();
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 2aabda4..b1aaa24 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -50,6 +50,9 @@ import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.language.translate.DefaultTranslator;
 import org.apache.tika.language.translate.Translator;
+import org.apache.tika.metadata.filter.CompositeMetadataFilter;
+import org.apache.tika.metadata.filter.DefaultMetadataFilter;
+import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.mime.MimeTypeException;
@@ -105,6 +108,10 @@ public class TikaConfig {
         return new SimpleThreadPoolExecutor();
     }
 
+    private static MetadataFilter getDefaultMetadataFilter(ServiceLoader loader) {
+        return new DefaultMetadataFilter(loader);
+    }
+
     //use this to look for unneeded instantiations of TikaConfig
     protected static AtomicInteger TIMES_INSTANTIATED = new AtomicInteger();
 
@@ -116,6 +123,7 @@ public class TikaConfig {
     private final MimeTypes mimeTypes;
     private final ExecutorService executorService;
     private final EncodingDetector encodingDetector;
+    private final MetadataFilter metadataFilter;
 
     public TikaConfig(String file)
             throws TikaException, IOException, SAXException {
@@ -181,6 +189,7 @@ public class TikaConfig {
         TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
         ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
         EncodingDetectorXmlLoader encodingDetectorXmlLoader = new EncodingDetectorXmlLoader();
+        MetadataFilterXmlLoader metadataFilterXmlLoader = new MetadataFilterXmlLoader();
         updateXMLReaderUtils(element);
         this.mimeTypes = typesFromDomElement(element);
         this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
@@ -190,6 +199,7 @@ public class TikaConfig {
         this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
         this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
         this.executorService = executorLoader.loadOverall(element, mimeTypes, loader);
+        this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, mimeTypes, loader);
         this.serviceLoader = loader;
         TIMES_INSTANTIATED.incrementAndGet();
     }
@@ -215,6 +225,7 @@ public class TikaConfig {
         this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector);
         this.translator = getDefaultTranslator(serviceLoader);
         this.executorService = getDefaultExecutorService();
+        this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
         TIMES_INSTANTIATED.incrementAndGet();
     }
 
@@ -250,6 +261,7 @@ public class TikaConfig {
             this.detector = getDefaultDetector(mimeTypes, serviceLoader);
             this.translator = getDefaultTranslator(serviceLoader);
             this.executorService = getDefaultExecutorService();
+            this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
         } else {
             ServiceLoader tmpServiceLoader = new ServiceLoader();
             try (InputStream stream = getConfigInputStream(config, tmpServiceLoader)) {
@@ -260,7 +272,8 @@ public class TikaConfig {
                 EncodingDetectorXmlLoader encodingDetectorLoader = new EncodingDetectorXmlLoader();
                 TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
                 ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
-                
+                MetadataFilterXmlLoader metadataFilterXmlLoader = new MetadataFilterXmlLoader();
+
                 this.mimeTypes = typesFromDomElement(element);
                 this.encodingDetector = encodingDetectorLoader.loadOverall(element, mimeTypes, serviceLoader);
 
@@ -270,6 +283,7 @@ public class TikaConfig {
                 this.detector = detectorLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.translator = translatorLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.executorService = executorLoader.loadOverall(element, mimeTypes, serviceLoader);
+                this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, mimeTypes, serviceLoader);
             } catch (SAXException e) {
                 throw new TikaException(
                         "Specified Tika configuration has syntax errors: "
@@ -394,6 +408,9 @@ public class TikaConfig {
         return serviceLoader;
     }
 
+    public MetadataFilter getMetadataFilter() {
+        return metadataFilter;
+    }
     /**
      * Provides a default configuration (TikaConfig).  Currently creates a
      * new instance each time it's called; we may be able to have it
@@ -1111,7 +1128,8 @@ public class TikaConfig {
         }
 
         @Override
-        CompositeEncodingDetector createComposite(List<EncodingDetector> encodingDetectors, MimeTypes mimeTypes, ServiceLoader loader) {
+        CompositeEncodingDetector createComposite(List<EncodingDetector> encodingDetectors,
+                                                  MimeTypes mimeTypes, ServiceLoader loader) {
             return new CompositeEncodingDetector(encodingDetectors);
         }
 
@@ -1152,5 +1170,91 @@ public class TikaConfig {
         }
     }
 
+    private static class MetadataFilterXmlLoader extends
+            XmlLoader<MetadataFilter, MetadataFilter> {
+
+        boolean supportsComposite() {
+            return true;
+        }
+
+        String getParentTagName() {
+            return "metadataFilters";
+        }
+
+        String getLoaderTagName() {
+            return "metadataFilter";
+        }
+
+        @Override
+        Class<? extends MetadataFilter> getLoaderClass() {
+            return MetadataFilter.class;
+        }
+
+
+        @Override
+        boolean isComposite(MetadataFilter loaded) {
+            return loaded instanceof CompositeMetadataFilter;
+        }
+
+        @Override
+        boolean isComposite(Class<? extends MetadataFilter> loadedClass) {
+            return CompositeMetadataFilter.class.isAssignableFrom(loadedClass);
+        }
+
+        @Override
+        MetadataFilter preLoadOne(Class<? extends MetadataFilter> loadedClass,
+                                    String classname, MimeTypes mimeTypes) throws TikaException {
+            // Check for classes which can't be set in config
+            // Continue with normal loading
+            return null;
+        }
+
+        @Override
+        MetadataFilter createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+            return getDefaultMetadataFilter(loader);
+        }
+
+        //this ignores the service loader
+        @Override
+        MetadataFilter createComposite(List<MetadataFilter> loaded, MimeTypes mimeTypes, ServiceLoader loader) {
+            return new DefaultMetadataFilter(loaded);
+        }
+
+        @Override
+        MetadataFilter createComposite(Class<? extends MetadataFilter> metadataFilterClass,
+                                         List<MetadataFilter> childMetadataFilters,
+                                         Set<Class<? extends MetadataFilter>> excludeFilters,
+                                         Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader)
+                throws InvocationTargetException, IllegalAccessException,
+                InstantiationException {
+            MetadataFilter metadataFilter = null;
+            Constructor<? extends MetadataFilter> c;
+
+            // Try the possible default and composite detector constructors
+            if (metadataFilter == null) {
+                try {
+                    c = metadataFilterClass.getConstructor(ServiceLoader.class, Collection.class);
+                    metadataFilter = c.newInstance(loader, excludeFilters);
+                } catch (NoSuchMethodException me) {
+                    me.printStackTrace();
+                }
+            }
+            if (metadataFilter == null) {
+                try {
+                    c = metadataFilterClass.getConstructor(List.class);
+                    metadataFilter = c.newInstance(childMetadataFilters);
+                } catch (NoSuchMethodException me) {
+                    me.printStackTrace();
+                }
+            }
+
+            return metadataFilter;
+        }
+
+        @Override
+        MetadataFilter decorate(MetadataFilter created, Element element) {
+            return created; // No decoration of MetadataFilters
+        }
+    }
 
 }
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
new file mode 100644
index 0000000..05324f2
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * This class clears the entire metadata object if the
+ * mime matches the mime filter.  The idea is that you might not want
+ * to store/transmit metadata for images or specific file types.
+ */
+public class ClearByMimeMetadataFilter implements MetadataFilter {
+    private final Set<String> mimes;
+
+    public ClearByMimeMetadataFilter() {
+        this(new HashSet<>());
+    }
+
+    public ClearByMimeMetadataFilter(Set<String> mimes) {
+        this.mimes = mimes;
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        String mimeString = metadata.get(Metadata.CONTENT_TYPE);
+        if (mimeString == null) {
+            return;
+        }
+        MediaType mt = MediaType.parse(mimeString);
+        if (mt != null) {
+            mimeString = mt.getBaseType().toString();
+        }
+        if (mimes.contains(mimeString)) {
+            for (String n : metadata.names()) {
+                metadata.remove(n);
+            }
+
+        }
+    }
+
+    /**
+     *
+     * @param mimesString comma-delimited list of mimes that will trigger complete removal of metadata
+     */
+    @Field
+    public void setMimes(String mimesString) {
+        for (String include : mimesString.split(",")) {
+            mimes.add(include);
+        }
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
new file mode 100644
index 0000000..4d592c9
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.util.List;
+
+public class CompositeMetadataFilter implements MetadataFilter {
+
+    private final List<MetadataFilter> filters;
+
+    public CompositeMetadataFilter(List<MetadataFilter> filters) {
+        this.filters = filters;
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        for (MetadataFilter filter : filters) {
+            filter.filter(metadata);
+        }
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/DefaultMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/DefaultMetadataFilter.java
new file mode 100644
index 0000000..7671f50
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/DefaultMetadataFilter.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.utils.ServiceLoaderUtils;
+
+import java.util.List;
+
+public class DefaultMetadataFilter extends CompositeMetadataFilter {
+
+    private static List<MetadataFilter> getDefaultFilters(
+            ServiceLoader loader) {
+        List<MetadataFilter> detectors = loader.loadStaticServiceProviders(MetadataFilter.class);
+        ServiceLoaderUtils.sortLoadedClasses(detectors);
+
+        return detectors;
+    }
+
+    public DefaultMetadataFilter(ServiceLoader serviceLoader) {
+        super(getDefaultFilters(serviceLoader));
+    }
+
+    public DefaultMetadataFilter(List<MetadataFilter> metadataFilters) {
+        super(metadataFilters);
+    }
+
+    public DefaultMetadataFilter() {
+        this(new ServiceLoader());
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
new file mode 100644
index 0000000..3b6e2a0
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class ExcludeFieldMetadataFilter implements MetadataFilter {
+    private final Set<String> exclude;
+
+    public ExcludeFieldMetadataFilter() {
+        this(new HashSet<>());
+    }
+    public ExcludeFieldMetadataFilter(Set<String> exclude) {
+        this.exclude = exclude;
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        for (String field : exclude) {
+            metadata.remove(field);
+        }
+    }
+
+    /**
+     *
+     * @param excludeString comma-delimited list of fields to exclude
+     */
+    @Field
+    public void setExclude(String excludeString) {
+        for (String include : excludeString.split(",")) {
+            exclude.add(include);
+        }
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
new file mode 100644
index 0000000..4bc6c9e
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+public class IncludeFieldMetadataFilter implements MetadataFilter {
+    private final Set<String> includeSet;
+
+    public IncludeFieldMetadataFilter() {
+        this(new HashSet<>());
+    }
+
+    public IncludeFieldMetadataFilter(Set<String> fields) {
+        this.includeSet = fields;
+    }
+
+    /**
+     *
+     * @param includeString comma-delimited list of fields to include
+     */
+    @Field
+    public void setInclude(String includeString) {
+        for (String include : includeString.split(",")) {
+            includeSet.add(include);
+        }
+    }
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+
+        for (String n : metadata.names()) {
+            if (! includeSet.contains(n)) {
+                metadata.remove(n);
+            }
+        }
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
new file mode 100644
index 0000000..7a8f345
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.io.Serializable;
+
+/**
+ * Filters the metadata in place
+ *
+ * @since Apache Tika 1.25
+ */
+public interface MetadataFilter extends Serializable {
+
+    void filter(Metadata metadata) throws TikaException;
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
new file mode 100644
index 0000000..9cd1ec3
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * This filter performs no operations on the metadata
+ * and leaves it untouched.
+ */
+public class NoOpFilter implements MetadataFilter {
+
+    public static NoOpFilter NOOP_FILTER = new NoOpFilter();
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        //no op
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java b/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
index 408598f..50f0fb8 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
@@ -16,7 +16,10 @@
  */
 package org.apache.tika.sax;
 
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.NoOpFilter;
 import org.apache.tika.utils.ParserUtils;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -40,12 +43,13 @@ import java.util.List;
 public class RecursiveParserWrapperHandler extends AbstractRecursiveParserWrapperHandler {
 
     protected final List<Metadata> metadataList = new LinkedList<>();
+    private final MetadataFilter metadataFilter;
 
     /**
      * Create a handler with no limit on the number of embedded resources
      */
     public RecursiveParserWrapperHandler(ContentHandlerFactory contentHandlerFactory) {
-        super(contentHandlerFactory);
+        this(contentHandlerFactory, -1, NoOpFilter.NOOP_FILTER);
     }
 
     /**
@@ -54,7 +58,13 @@ public class RecursiveParserWrapperHandler extends AbstractRecursiveParserWrappe
      * @param maxEmbeddedResources number of embedded resources that will be parsed
      */
     public RecursiveParserWrapperHandler(ContentHandlerFactory contentHandlerFactory, int maxEmbeddedResources) {
+        this(contentHandlerFactory, maxEmbeddedResources, NoOpFilter.NOOP_FILTER);
+    }
+
+    public RecursiveParserWrapperHandler(ContentHandlerFactory contentHandlerFactory, int maxEmbeddedResources,
+                                         MetadataFilter metadataFilter) {
         super(contentHandlerFactory, maxEmbeddedResources);
+        this.metadataFilter = metadataFilter;
     }
 
     /**
@@ -79,7 +89,15 @@ public class RecursiveParserWrapperHandler extends AbstractRecursiveParserWrappe
     public void endEmbeddedDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException {
         super.endEmbeddedDocument(contentHandler, metadata);
         addContent(contentHandler, metadata);
-        metadataList.add(ParserUtils.cloneMetadata(metadata));
+        try {
+            metadataFilter.filter(metadata);
+        } catch (TikaException e) {
+            throw new SAXException(e);
+        }
+
+        if (metadata.size() > 0) {
+            metadataList.add(ParserUtils.cloneMetadata(metadata));
+        }
     }
 
     /**
@@ -92,8 +110,15 @@ public class RecursiveParserWrapperHandler extends AbstractRecursiveParserWrappe
     public void endDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException {
         super.endDocument(contentHandler, metadata);
         addContent(contentHandler, metadata);
+        try {
+            metadataFilter.filter(metadata);
+        } catch (TikaException e) {
+            throw new SAXException(e);
+        }
 
-        metadataList.add(0, ParserUtils.cloneMetadata(metadata));
+        if (metadata.size() > 0) {
+            metadataList.add(0, ParserUtils.cloneMetadata(metadata));
+        }
     }
 
     /**
diff --git a/tika-core/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter b/tika-core/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter
new file mode 100644
index 0000000..604a480
--- /dev/null
+++ b/tika-core/src/main/resources/META-INF/services/org.apache.tika.metadata.filter.MetadataFilter
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.metadata.filter.NoOpFilter
\ No newline at end of file
diff --git a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
index 3d67eb1..d26b936 100644
--- a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
+++ b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
@@ -349,4 +349,6 @@ public class TikaConfigTest extends AbstractTikaConfigTest {
         getConfig("TIKA-2732-xmlreaderutils-exc.xml");
     }
 
+
+
 }
\ No newline at end of file
diff --git a/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java b/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
new file mode 100644
index 0000000..0632dd4
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.util.Locale;
+
+/**
+ * Mock Filter for testing uppercasing of all values
+ */
+public class MockUpperCaseFilter implements MetadataFilter {
+
+    @Override
+    public void filter(Metadata metadata) throws TikaException {
+        for (String n : metadata.names()) {
+            String[] vals = metadata.getValues(n);
+            metadata.remove(n);
+            for (int i = 0; i < vals.length; i++) {
+                metadata.add(n, vals[i].toUpperCase(Locale.US));
+            }
+        }
+    }
+}
diff --git a/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java b/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
new file mode 100644
index 0000000..e933d0c
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.metadata.filter;
+
+import org.apache.tika.config.AbstractTikaConfigTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.junit.Test;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+public class TestMetadataFilter extends AbstractTikaConfigTest {
+
+    @Test
+    public void testDefault() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+
+        MetadataFilter defaultFilter = new DefaultMetadataFilter();
+        defaultFilter.filter(metadata);
+
+        assertEquals(2, metadata.names().length);
+        assertEquals("title", metadata.get("title"));
+        assertEquals("author", metadata.get("author"));
+    }
+
+    @Test
+    public void testIncludeFilter() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+
+        MetadataFilter filter = new IncludeFieldMetadataFilter(set("title"));
+        filter.filter(metadata);
+        assertEquals(1, metadata.names().length);
+        assertEquals("title", metadata.get("title"));
+        assertNull(metadata.get("author"));
+    }
+
+    @Test
+    public void testExcludeFilter() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+
+        MetadataFilter filter = new ExcludeFieldMetadataFilter(set("title"));
+        filter.filter(metadata);
+        assertEquals(1, metadata.names().length);
+        assertEquals("author", metadata.get("author"));
+        assertNull(metadata.get("title"));
+    }
+
+    @Test
+    public void testConfigIncludeFilter() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-include.xml");
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+        metadata.set("content", "content");
+
+        config.getMetadataFilter().filter(metadata);
+
+        assertEquals(2, metadata.size());
+        assertEquals("title", metadata.get("title"));
+        assertEquals("author", metadata.get("author"));
+    }
+
+    @Test
+    public void testConfigExcludeFilter() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-exclude.xml");
+        Metadata metadata = new Metadata();
+        metadata.set("title", "title");
+        metadata.set("author", "author");
+        metadata.set("content", "content");
+
+        config.getMetadataFilter().filter(metadata);
+
+        assertEquals(1, metadata.size());
+        assertEquals("content", metadata.get("content"));
+    }
+
+    @Test
+    public void testConfigIncludeAndUCFilter() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-include-uc.xml");
+        String[] expectedTitles = new String[]{
+                "TITLE1", "TITLE2", "TITLE3"
+        };
+        Metadata metadata = new Metadata();
+        metadata.add("title", "title1");
+        metadata.add("title", "title2");
+        metadata.add("title", "title3");
+        metadata.set("author", "author");
+        metadata.set("content", "content");
+
+        config.getMetadataFilter().filter(metadata);
+
+        assertEquals(2, metadata.size());
+        assertArrayEquals(expectedTitles, metadata.getValues("title"));
+        assertEquals("AUTHOR", metadata.get("author"));
+    }
+
+    @Test
+    public void testMimeClearingFilter() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, MediaType.image("jpeg").toString());
+        metadata.set("author", "author");
+
+        MetadataFilter filter = new ClearByMimeMetadataFilter(set("image/jpeg","application/pdf"));
+        filter.filter(metadata);
+        assertEquals(0, metadata.size());
+
+        metadata.set(Metadata.CONTENT_TYPE, MediaType.text("plain").toString());
+        metadata.set("author", "author");
+        filter.filter(metadata);
+        assertEquals(2, metadata.size());
+        assertEquals("author", metadata.get("author"));
+
+    }
+
+    @Test
+    public void testMimeClearingFilterConfig() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-mimes-uc.xml");
+
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, MediaType.image("jpeg").toString());
+        metadata.set("author", "author");
+
+        MetadataFilter filter = config.getMetadataFilter();
+        filter.filter(metadata);
+        debug(metadata);
+        assertEquals(0, metadata.size());
+
+        metadata.set(Metadata.CONTENT_TYPE, MediaType.text("plain").toString());
+        metadata.set("author", "author");
+        filter.filter(metadata);
+        assertEquals(2, metadata.size());
+        assertEquals("AUTHOR", metadata.get("author"));
+
+    }
+
+    private static Set<String> set(String ... items) {
+        Set<String> set = new HashSet<>();
+        for (String item : items) {
+            set.add(item);
+        }
+        return set;
+    }
+}
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
new file mode 100644
index 0000000..27517f6
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.ExcludeFieldMetadataFilter">
+      <params>
+        <param name="exclude" type="string">title,author</param>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+</properties>
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
new file mode 100644
index 0000000..e0df476
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+      <params>
+        <param name="include" type="string">title,author</param>
+      </params>
+    </metadataFilter>
+    <metadataFilter class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/>
+  </metadataFilters>
+</properties>
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
new file mode 100644
index 0000000..e92dff8
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+      <params>
+        <param name="include" type="string">title,author</param>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+</properties>
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
new file mode 100644
index 0000000..486280c
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
+      <params>
+        <param name="mimes" type="string">image/jpeg,application/pdf</param>
+      </params>
+    </metadataFilter>
+    <metadataFilter class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/>
+  </metadataFilters>
+</properties>
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 9a34103..6f43255 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -21,6 +21,7 @@ package org.apache.tika.parser;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -30,6 +31,7 @@ import java.util.Set;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.ClosedInputStream;
 import org.apache.tika.io.ProxyInputStream;
@@ -365,6 +367,47 @@ public class RecursiveParserWrapperTest extends TikaTest {
 
     }
 
+    @Test
+    public void testIncludeFilter() throws Exception {
+        //TIKA-3137
+        ParseContext context = new ParseContext();
+        Metadata metadata = new Metadata();
+        TikaConfig tikaConfig = new TikaConfig(getClass().getResourceAsStream("TIKA-3137-include.xml"));
+        Parser p = new AutoDetectParser(tikaConfig);
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p, true);
+        String path = "/test-documents/test_recursive_embedded.docx";
+        ContentHandlerFactory contentHandlerFactory =
+                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT,
+                        -1);
+
+        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(contentHandlerFactory,
+                -1, tikaConfig.getMetadataFilter());
+        try (InputStream is = getClass().getResourceAsStream(path)) {
+            wrapper.parse(is, handler, metadata, context);
+        }
+        List<Metadata> metadataList = handler.getMetadataList();
+        assertEquals(5, metadataList.size());
+
+        Set<String> expectedKeys = new HashSet<>();
+        expectedKeys.add("X-TIKA:content");
+        expectedKeys.add("extended-properties:Application");
+        expectedKeys.add("Content-Type");
+        for (Metadata m : metadataList) {
+            if (m.get(Metadata.CONTENT_TYPE).equals("image/emf")) {
+                fail("emf should have been filtered out");
+            }
+            if (m.get(Metadata.CONTENT_TYPE).startsWith("text/plain")) {
+                fail("text/plain should have been filtered out");
+            }
+            assertTrue(m.names().length >= 2);
+            for (String n : m.names()) {
+                if (! expectedKeys.contains(n)) {
+                    fail("didn't expect "+n);
+                }
+            }
+        }
+    }
+
     private List<Metadata> getMetadata(Metadata metadata, ContentHandlerFactory contentHandlerFactory,
                                        boolean catchEmbeddedExceptions,
                                        DigestingParser.Digester digester) throws Exception {
diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
new file mode 100644
index 0000000..765bc11
--- /dev/null
+++ b/tika-parsers/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+      <params>
+        <param name="include" type="string">X-TIKA:content,extended-properties:Application,Content-Type</param>
+      </params>
+    </metadataFilter>
+    <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
+      <params>
+        <param name="mimes" type="string">image/emf,text/plain</param>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+</properties>
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
index 07d20c5..71e7180 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
@@ -152,7 +152,8 @@ public class RecursiveMetadataResource {
         BasicContentHandlerFactory.HANDLER_TYPE type =
                 BasicContentHandlerFactory.parseHandlerType(handlerTypeName, DEFAULT_HANDLER_TYPE);
 		RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(
-		        new BasicContentHandlerFactory(type, writeLimit), maxEmbeddedResources);
+		        new BasicContentHandlerFactory(type, writeLimit), maxEmbeddedResources,
+                TikaResource.getConfig().getMetadataFilter());
 		try {
             TikaResource.parse(wrapper, LOG, info.getPath(), is, handler, metadata, context);
         } catch (SecurityException e) {
diff --git a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
index 92c9d34..8b5f153 100644
--- a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
+++ b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
@@ -90,7 +90,8 @@ public abstract class CXFTestBase {
 
     @Before
     public void setUp() throws Exception {
-        this.tika = new TikaConfig(getClass().getResourceAsStream("tika-config-for-server-tests.xml"));
+
+        this.tika = new TikaConfig(getTikaConfigInputStream());
         TikaResource.init(tika,
                 new CommonsDigester(DIGESTER_READ_LIMIT, "md5,sha1:32"),
                 new DefaultInputStreamFactory(), new ServerStatus(true));
@@ -120,6 +121,10 @@ public abstract class CXFTestBase {
         server = sf.create();
     }
 
+    protected InputStream getTikaConfigInputStream() {
+        return getClass().getResourceAsStream("tika-config-for-server-tests.xml");
+    }
+
     /**
      * Have the test do {@link JAXRSServerFactoryBean#setResourceClasses(Class...)}
      * and {@link JAXRSServerFactoryBean#setResourceProvider(Class, org.apache.cxf.jaxrs.lifecycle.ResourceProvider)}
diff --git a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java
new file mode 100644
index 0000000..748ee77
--- /dev/null
+++ b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataFilterTest.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.ext.multipart.Attachment;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
+import org.apache.tika.sax.RecursiveParserWrapperHandler;
+import org.apache.tika.server.resource.RecursiveMetadataResource;
+import org.apache.tika.server.writer.MetadataListMessageBodyWriter;
+import org.junit.Test;
+
+import javax.ws.rs.core.Response;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.tika.TikaTest.assertNotContained;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+public class RecursiveMetadataFilterTest extends CXFTestBase {
+
+    private static final String META_PATH = "/rmeta";
+
+    private static final String TEST_RECURSIVE_DOC = "test_recursive_embedded.docx";
+
+    @Override
+    protected InputStream getTikaConfigInputStream() {
+        return getClass().getResourceAsStream("TIKA-3137-include.xml");
+    }
+    @Override
+    protected void setUpResources(JAXRSServerFactoryBean sf) {
+        sf.setResourceClasses(RecursiveMetadataResource.class);
+        sf.setResourceProvider(RecursiveMetadataResource.class,
+                new SingletonResourceProvider(new RecursiveMetadataResource()));
+    }
+
+    @Override
+    protected void setUpProviders(JAXRSServerFactoryBean sf) {
+        List<Object> providers = new ArrayList<>();
+        providers.add(new MetadataListMessageBodyWriter());
+        sf.setProviders(providers);
+    }
+
+    @Test
+    public void testBasicFilter() throws Exception {
+        Response response = WebClient
+                .create(endPoint + META_PATH)
+                .accept("application/json")
+                .acceptEncoding("gzip")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+
+        Reader reader = new InputStreamReader(new GzipCompressorInputStream((InputStream) response.getEntity()), UTF_8);
+        List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+        assertEquals(5, metadataList.size());
+
+        Set<String> expectedKeys = new HashSet<>();
+        expectedKeys.add("X-TIKA:content");
+        expectedKeys.add("extended-properties:Application");
+        expectedKeys.add("Content-Type");
+        for (Metadata m : metadataList) {
+            if (m.get(Metadata.CONTENT_TYPE).equals("image/emf")) {
+                fail("emf should have been filtered out");
+            }
+            if (m.get(Metadata.CONTENT_TYPE).startsWith("text/plain")) {
+                fail("text/plain should have been filtered out");
+            }
+            assertTrue(m.names().length >= 2);
+            for (String n : m.names()) {
+                if (! expectedKeys.contains(n)) {
+                    fail("didn't expect "+n);
+                }
+            }
+        }
+    }
+}
diff --git a/tika-server/src/test/resources/org/apache/tika/server/TIKA-3137-include.xml b/tika-server/src/test/resources/org/apache/tika/server/TIKA-3137-include.xml
new file mode 100644
index 0000000..765bc11
--- /dev/null
+++ b/tika-server/src/test/resources/org/apache/tika/server/TIKA-3137-include.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <metadataFilters>
+    <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+      <params>
+        <param name="include" type="string">X-TIKA:content,extended-properties:Application,Content-Type</param>
+      </params>
+    </metadataFilter>
+    <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
+      <params>
+        <param name="mimes" type="string">image/emf,text/plain</param>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+</properties>