You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/04/03 13:58:48 UTC

[tika] 02/02: TIKA-2572 -- review overly broad catches

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit b329820804bb74cc290337041eee595395e77435
Author: tallison <ta...@apache.org>
AuthorDate: Fri Apr 3 09:58:19 2020 -0400

    TIKA-2572 -- review overly broad catches
---
 .../src/main/java/org/apache/tika/detect/AutoDetectReader.java |  2 +-
 .../src/main/java/org/apache/tika/detect/XmlRootExtractor.java |  2 ++
 .../src/main/java/org/apache/tika/parser/CompositeParser.java  |  9 ++++++---
 .../src/main/java/org/apache/tika/utils/CharsetUtils.java      |  9 +++++----
 .../src/main/java/org/apache/tika/parser/crypto/TSDParser.java |  6 +++++-
 .../java/org/apache/tika/parser/html/HtmlEncodingDetector.java |  3 ++-
 .../java/org/apache/tika/parser/mbox/OutlookPSTParser.java     | 10 ++++------
 .../java/org/apache/tika/parser/microsoft/OfficeParser.java    |  5 ++---
 .../tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java    |  9 +++++----
 .../parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java    |  2 ++
 .../apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java    |  2 +-
 .../java/org/apache/tika/parser/ocr/TesseractOCRParser.java    |  4 +++-
 .../apache/tika/parser/pkg/StreamingZipContainerDetector.java  |  4 ++--
 .../main/java/org/apache/tika/parser/rtf/TextExtractor.java    |  2 +-
 .../java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java |  2 +-
 .../org/apache/tika/parser/txt/UniversalEncodingListener.java  |  2 +-
 16 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java b/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
index 44dce8e..ca23a17 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
@@ -76,7 +76,7 @@ public class AutoDetectReader extends BufferedReader {
             if (charset != null) {
                 try {
                     return CharsetUtils.forName(charset);
-                } catch (Exception e) {
+                } catch (IllegalArgumentException e) {
                     // ignore
                 }
             }
diff --git a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
index 20a0be5..38148a5 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
@@ -53,6 +53,8 @@ public class XmlRootExtractor {
             XMLReaderUtils.parseSAX(
                     new CloseShieldInputStream(stream),
                     new OfflineContentHandler(handler), EMPTY_CONTEXT);
+        } catch (SecurityException e) {
+            throw e;
         } catch (Exception ignore) {
         }
         return handler.rootElement;
diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
index c5c95a6..bf10736 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
@@ -275,9 +275,9 @@ public class CompositeParser extends AbstractParser {
             ParserUtils.recordParserDetails(parser, metadata);
             try {
                 parser.parse(taggedStream, taggedHandler, metadata, context);
-            } catch (RuntimeException e) {
-                throw new TikaException(
-                        "Unexpected RuntimeException from " + parser, e);
+            } catch (SecurityException e) {
+                //rethrow security exceptions
+                throw e;
             } catch (IOException e) {
                 taggedStream.throwIfCauseOf(e);
                 throw new TikaException(
@@ -286,6 +286,9 @@ public class CompositeParser extends AbstractParser {
                 if (taggedHandler != null) taggedHandler.throwIfCauseOf(e);
                 throw new TikaException(
                         "TIKA-237: Illegal SAXException from " + parser, e);
+            } catch (RuntimeException e) {
+                throw new TikaException(
+                        "Unexpected RuntimeException from " + parser, e);
             }
         } finally {
             tmp.dispose();
diff --git a/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java b/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java
index 29e8782..a2931cf 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java
@@ -18,6 +18,7 @@ package org.apache.tika.utils;
 
 import static java.util.Locale.ENGLISH;
 
+import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
@@ -56,7 +57,7 @@ public class CharsetUtils {
                 for (String alias : charset.aliases()) {
                     COMMON_CHARSETS.put(alias.toLowerCase(ENGLISH), charset);
                 }
-            } catch (Exception e) {
+            } catch (IllegalArgumentException e) {
                 // ignore
             }
         }
@@ -139,7 +140,7 @@ public class CharsetUtils {
     public static String clean(String charsetName) {
         try {
             return forName(charsetName).name();
-        } catch (Exception e) {
+        } catch (IllegalArgumentException e) {
             return null;
         }
     }
@@ -194,8 +195,8 @@ public class CharsetUtils {
                 if (cs != null) {
                     return cs;
                 }
-            } catch (Exception e) {
-                // ignore
+            } catch (IllegalArgumentException|IllegalAccessException|InvocationTargetException e) {
+                //ignore
             }
         }
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
index 1107d7c..f841066 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
@@ -128,6 +128,8 @@ public class TSDParser extends AbstractParser {
                 tsdMetasList.add(tsdMetas);
             }
 
+        } catch (SecurityException e) {
+            throw e;
         } catch (Exception ex) {
             LOG.error("Error in TSDParser.buildMetas {}", ex.getMessage());
             tsdMetasList.clear();
@@ -167,6 +169,8 @@ public class TSDParser extends AbstractParser {
                     edx.parseEmbedded(is, handler, metadata, false);
                 }
 
+            } catch (SecurityException e) {
+                throw e;
             } catch (Exception ex) {
                 LOG.error("Error in TSDParser.parseTSDContent {}", ex.getMessage());
             } finally {
@@ -179,7 +183,7 @@ public class TSDParser extends AbstractParser {
         if (cmsTimeStampedDataParser != null) {
             try {
                 cmsTimeStampedDataParser.close();
-            } catch (Exception ex) {
+            } catch (IOException ex) {
                 LOG.error("Error in TSDParser.closeCMSParser {}", ex.getMessage());
             }
         }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java
index c4c5188..c86ba7e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.HashSet;
@@ -161,7 +162,7 @@ public class HtmlEncodingDetector implements EncodingDetector {
                 if (CharsetUtils.isSupported(candCharset)) {
                     try {
                         return CharsetUtils.forName(candCharset);
-                    } catch (Exception e) {
+                    } catch (IllegalArgumentException e) {
                         //ignore
                     }
                 }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
index 3f9ce98..d810265 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
@@ -93,12 +93,10 @@ public class OutlookPSTParser extends AbstractParser {
             if (isValid) {
                 parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor);
             }
+        } catch (TikaException e) {
+            throw e;
         } catch (Exception e) {
-            if(e instanceof TikaException) {
-                throw (TikaException) e;
-            }else {
-                throw new TikaException(e.getMessage(), e);
-            }
+            throw new TikaException(e.getMessage(), e);
         } finally {
             if (pstFile != null && pstFile.getFileHandle() != null) {
                 try {
@@ -267,7 +265,7 @@ public class OutlookPSTParser extends AbstractParser {
                 xhtml.endElement("div");
 
             } catch (Exception e) {
-                throw new TikaException("Unable to unpack document stream", e);
+                EmbeddedDocumentUtil.recordEmbeddedStreamException(e, mailMetadata);
             }
         }
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index d38bcd4..391c912 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -317,10 +317,9 @@ public class OfficeParser extends AbstractOfficeParser {
         try {
             reader = new VBAMacroReader(fs);
             macros = reader.readMacros();
+        } catch (SecurityException e) {
+            throw e;
         } catch (Exception e) {
-            if (e instanceof SecurityException) {
-                throw e;
-            }
             Metadata m = new Metadata();
             m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
             m.set(Metadata.CONTENT_TYPE, "text/x-vbasic");
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
index 1865518..4ecebfb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
@@ -183,8 +183,10 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
 
                 tStream.close();
             }
+        } catch (SecurityException e) {
+            throw e;
         } catch (Exception ex) {
-
+            //swallow
         }
     }
 
@@ -204,10 +206,9 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
                 for (PackageRelationship rel : source.getRelationships()) {
                     try {
                         handleEmbeddedPart(source, rel, handler, metadata, handledTarget);
+                    } catch (SAXException|SecurityException e) {
+                        throw e;
                     } catch (Exception e) {
-                        if (e instanceof SAXException) {
-                            throw e;
-                        }
                         EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
                     }
                 }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
index 0641a81..57bbfeb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
@@ -129,6 +129,8 @@ public class SXWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
         XWPFStylesShim styles = null;
         try {
             styles = loadStyles(documentPart);
+        } catch (SecurityException e) {
+            throw e;
         } catch (Exception e) {
             metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING,
                     ExceptionUtils.getStackTrace(e));
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
index 18b3d1a..c446e0a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
@@ -146,7 +146,7 @@ public class CoreNLPNERecogniser implements NERecogniser {
             }
 
         } catch (Exception e) {
-            LOG.debug(e.getMessage(), e);
+            LOG.warn(e.getMessage(), e);
         }
         return names;
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 52cbfb1..b2c4496 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -225,8 +225,10 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
             }
 
 
+        } catch (SecurityException e) {
+            throw e;
         } catch (Exception e) {
-
+            //swallow
         } finally {
             IOUtils.closeQuietly(tmp);
         }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java
index abfd29e..67eaea8 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java
@@ -218,7 +218,7 @@ public class StreamingZipContainerDetector extends ZipContainerDetectorBase impl
         } catch (SecurityException e) {
             throw e;
         } catch (Exception e) {
-
+            //swallow
         }
         return relsHandler.rels;
     }
@@ -251,7 +251,7 @@ public class StreamingZipContainerDetector extends ZipContainerDetectorBase impl
         } catch (SecurityException e) {
             throw e;
         } catch (Exception e) {
-
+            //swallow
         }
         return contentTypeHandler.mediaType;
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
index dfc0956..1c6e4eb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
@@ -341,7 +341,7 @@ final class TextExtractor {
     private static Charset getCharset(String name) {
         try {
             return CharsetUtils.forName(name);
-        } catch (Exception e) {
+        } catch (IllegalArgumentException e) {
             return ASCII;
         }
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java
index 4a2c56b..9777d2e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java
@@ -70,7 +70,7 @@ public class Icu4jEncodingDetector implements EncodingDetector {
         for (CharsetMatch match : detector.detectAll()) {
             try {
                 return CharsetUtils.forName(match.getName());
-            } catch (Exception e) {
+            } catch (IllegalArgumentException e) {
                 // ignore
             }
         }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java b/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java
index 5e215a9..179466d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java
@@ -75,7 +75,7 @@ class UniversalEncodingListener implements CharsetListener {
         }
         try {
             this.charset = CharsetUtils.forName(name);
-        } catch (Exception e) {
+        } catch (IllegalArgumentException e) {
             // ignore
         }
     }