You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/04/03 13:58:48 UTC
[tika] 02/02: TIKA-2572 -- review overly broad catches
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
commit b329820804bb74cc290337041eee595395e77435
Author: tallison <ta...@apache.org>
AuthorDate: Fri Apr 3 09:58:19 2020 -0400
TIKA-2572 -- review overly broad catches
---
.../src/main/java/org/apache/tika/detect/AutoDetectReader.java | 2 +-
.../src/main/java/org/apache/tika/detect/XmlRootExtractor.java | 2 ++
.../src/main/java/org/apache/tika/parser/CompositeParser.java | 9 ++++++---
.../src/main/java/org/apache/tika/utils/CharsetUtils.java | 9 +++++----
.../src/main/java/org/apache/tika/parser/crypto/TSDParser.java | 6 +++++-
.../java/org/apache/tika/parser/html/HtmlEncodingDetector.java | 3 ++-
.../java/org/apache/tika/parser/mbox/OutlookPSTParser.java | 10 ++++------
.../java/org/apache/tika/parser/microsoft/OfficeParser.java | 5 ++---
.../tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java | 9 +++++----
.../parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java | 2 ++
.../apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java | 2 +-
.../java/org/apache/tika/parser/ocr/TesseractOCRParser.java | 4 +++-
.../apache/tika/parser/pkg/StreamingZipContainerDetector.java | 4 ++--
.../main/java/org/apache/tika/parser/rtf/TextExtractor.java | 2 +-
.../java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java | 2 +-
.../org/apache/tika/parser/txt/UniversalEncodingListener.java | 2 +-
16 files changed, 43 insertions(+), 30 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java b/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
index 44dce8e..ca23a17 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
@@ -76,7 +76,7 @@ public class AutoDetectReader extends BufferedReader {
if (charset != null) {
try {
return CharsetUtils.forName(charset);
- } catch (Exception e) {
+ } catch (IllegalArgumentException e) {
// ignore
}
}
diff --git a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
index 20a0be5..38148a5 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
@@ -53,6 +53,8 @@ public class XmlRootExtractor {
XMLReaderUtils.parseSAX(
new CloseShieldInputStream(stream),
new OfflineContentHandler(handler), EMPTY_CONTEXT);
+ } catch (SecurityException e) {
+ throw e;
} catch (Exception ignore) {
}
return handler.rootElement;
diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
index c5c95a6..bf10736 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
@@ -275,9 +275,9 @@ public class CompositeParser extends AbstractParser {
ParserUtils.recordParserDetails(parser, metadata);
try {
parser.parse(taggedStream, taggedHandler, metadata, context);
- } catch (RuntimeException e) {
- throw new TikaException(
- "Unexpected RuntimeException from " + parser, e);
+ } catch (SecurityException e) {
+ //rethrow security exceptions
+ throw e;
} catch (IOException e) {
taggedStream.throwIfCauseOf(e);
throw new TikaException(
@@ -286,6 +286,9 @@ public class CompositeParser extends AbstractParser {
if (taggedHandler != null) taggedHandler.throwIfCauseOf(e);
throw new TikaException(
"TIKA-237: Illegal SAXException from " + parser, e);
+ } catch (RuntimeException e) {
+ throw new TikaException(
+ "Unexpected RuntimeException from " + parser, e);
}
} finally {
tmp.dispose();
diff --git a/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java b/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java
index 29e8782..a2931cf 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/CharsetUtils.java
@@ -18,6 +18,7 @@ package org.apache.tika.utils;
import static java.util.Locale.ENGLISH;
+import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
@@ -56,7 +57,7 @@ public class CharsetUtils {
for (String alias : charset.aliases()) {
COMMON_CHARSETS.put(alias.toLowerCase(ENGLISH), charset);
}
- } catch (Exception e) {
+ } catch (IllegalArgumentException e) {
// ignore
}
}
@@ -139,7 +140,7 @@ public class CharsetUtils {
public static String clean(String charsetName) {
try {
return forName(charsetName).name();
- } catch (Exception e) {
+ } catch (IllegalArgumentException e) {
return null;
}
}
@@ -194,8 +195,8 @@ public class CharsetUtils {
if (cs != null) {
return cs;
}
- } catch (Exception e) {
- // ignore
+ } catch (IllegalArgumentException|IllegalAccessException|InvocationTargetException e) {
+ //ignore
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
index 1107d7c..f841066 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
@@ -128,6 +128,8 @@ public class TSDParser extends AbstractParser {
tsdMetasList.add(tsdMetas);
}
+ } catch (SecurityException e) {
+ throw e;
} catch (Exception ex) {
LOG.error("Error in TSDParser.buildMetas {}", ex.getMessage());
tsdMetasList.clear();
@@ -167,6 +169,8 @@ public class TSDParser extends AbstractParser {
edx.parseEmbedded(is, handler, metadata, false);
}
+ } catch (SecurityException e) {
+ throw e;
} catch (Exception ex) {
LOG.error("Error in TSDParser.parseTSDContent {}", ex.getMessage());
} finally {
@@ -179,7 +183,7 @@ public class TSDParser extends AbstractParser {
if (cmsTimeStampedDataParser != null) {
try {
cmsTimeStampedDataParser.close();
- } catch (Exception ex) {
+ } catch (IOException ex) {
LOG.error("Error in TSDParser.closeCMSParser {}", ex.getMessage());
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java
index c4c5188..c86ba7e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlEncodingDetector.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashSet;
@@ -161,7 +162,7 @@ public class HtmlEncodingDetector implements EncodingDetector {
if (CharsetUtils.isSupported(candCharset)) {
try {
return CharsetUtils.forName(candCharset);
- } catch (Exception e) {
+ } catch (IllegalArgumentException e) {
//ignore
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
index 3f9ce98..d810265 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
@@ -93,12 +93,10 @@ public class OutlookPSTParser extends AbstractParser {
if (isValid) {
parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor);
}
+ } catch (TikaException e) {
+ throw e;
} catch (Exception e) {
- if(e instanceof TikaException) {
- throw (TikaException) e;
- }else {
- throw new TikaException(e.getMessage(), e);
- }
+ throw new TikaException(e.getMessage(), e);
} finally {
if (pstFile != null && pstFile.getFileHandle() != null) {
try {
@@ -267,7 +265,7 @@ public class OutlookPSTParser extends AbstractParser {
xhtml.endElement("div");
} catch (Exception e) {
- throw new TikaException("Unable to unpack document stream", e);
+ EmbeddedDocumentUtil.recordEmbeddedStreamException(e, mailMetadata);
}
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index d38bcd4..391c912 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -317,10 +317,9 @@ public class OfficeParser extends AbstractOfficeParser {
try {
reader = new VBAMacroReader(fs);
macros = reader.readMacros();
+ } catch (SecurityException e) {
+ throw e;
} catch (Exception e) {
- if (e instanceof SecurityException) {
- throw e;
- }
Metadata m = new Metadata();
m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
m.set(Metadata.CONTENT_TYPE, "text/x-vbasic");
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
index 1865518..4ecebfb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
@@ -183,8 +183,10 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
tStream.close();
}
+ } catch (SecurityException e) {
+ throw e;
} catch (Exception ex) {
-
+ //swallow
}
}
@@ -204,10 +206,9 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
for (PackageRelationship rel : source.getRelationships()) {
try {
handleEmbeddedPart(source, rel, handler, metadata, handledTarget);
+ } catch (SAXException|SecurityException e) {
+ throw e;
} catch (Exception e) {
- if (e instanceof SAXException) {
- throw e;
- }
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
index 0641a81..57bbfeb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
@@ -129,6 +129,8 @@ public class SXWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
XWPFStylesShim styles = null;
try {
styles = loadStyles(documentPart);
+ } catch (SecurityException e) {
+ throw e;
} catch (Exception e) {
metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING,
ExceptionUtils.getStackTrace(e));
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
index 18b3d1a..c446e0a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
@@ -146,7 +146,7 @@ public class CoreNLPNERecogniser implements NERecogniser {
}
} catch (Exception e) {
- LOG.debug(e.getMessage(), e);
+ LOG.warn(e.getMessage(), e);
}
return names;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 52cbfb1..b2c4496 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -225,8 +225,10 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
}
+ } catch (SecurityException e) {
+ throw e;
} catch (Exception e) {
-
+ //swallow
} finally {
IOUtils.closeQuietly(tmp);
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java
index abfd29e..67eaea8 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/StreamingZipContainerDetector.java
@@ -218,7 +218,7 @@ public class StreamingZipContainerDetector extends ZipContainerDetectorBase impl
} catch (SecurityException e) {
throw e;
} catch (Exception e) {
-
+ //swallow
}
return relsHandler.rels;
}
@@ -251,7 +251,7 @@ public class StreamingZipContainerDetector extends ZipContainerDetectorBase impl
} catch (SecurityException e) {
throw e;
} catch (Exception e) {
-
+ //swallow
}
return contentTypeHandler.mediaType;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
index dfc0956..1c6e4eb 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
@@ -341,7 +341,7 @@ final class TextExtractor {
private static Charset getCharset(String name) {
try {
return CharsetUtils.forName(name);
- } catch (Exception e) {
+ } catch (IllegalArgumentException e) {
return ASCII;
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java b/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java
index 4a2c56b..9777d2e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/txt/Icu4jEncodingDetector.java
@@ -70,7 +70,7 @@ public class Icu4jEncodingDetector implements EncodingDetector {
for (CharsetMatch match : detector.detectAll()) {
try {
return CharsetUtils.forName(match.getName());
- } catch (Exception e) {
+ } catch (IllegalArgumentException e) {
// ignore
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java b/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java
index 5e215a9..179466d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/txt/UniversalEncodingListener.java
@@ -75,7 +75,7 @@ class UniversalEncodingListener implements CharsetListener {
}
try {
this.charset = CharsetUtils.forName(name);
- } catch (Exception e) {
+ } catch (IllegalArgumentException e) {
// ignore
}
}