You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/05/16 13:46:26 UTC

[tika] branch master updated (c020e48 -> ed57e6e)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git.

      from  c020e48   TIKA-2360 -- require users to turn on SentimentParser; remove glob detection for .sent; skip unit tests if network call fails.
       new  f78b7d0   clean up white space
       new  ac1791a   clean up indentation
       new  ed57e6e   TIKA-2364 -- convert printstacktrace to log

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "adds" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../tika/parser/external/ExternalParser.java       | 299 +++++++++++----------
 .../org/apache/tika/eval/AbstractProfiler.java     |   8 +-
 .../java/org/apache/tika/eval/ExtractComparer.java |   1 -
 .../java/org/apache/tika/eval/db/JDBCUtil.java     |   1 -
 .../java/org/apache/tika/eval/io/DBWriter.java     |   2 +-
 .../java/org/apache/tika/eval/io/XMLLogReader.java |   9 +-
 .../tika/langdetect/Lingo24LangDetector.java       |   9 +-
 .../apache/tika/langdetect/TextLangDetector.java   |  11 +-
 .../chm/accessor/ChmDirectoryListingSet.java       |   9 +-
 .../apache/tika/parser/chm/core/ChmCommons.java    |  10 +-
 .../apache/tika/parser/chm/core/ChmExtractor.java  |   9 +-
 .../org/apache/tika/parser/gdal/GDALParser.java    |   7 +-
 .../geoinfo/GeographicInformationParser.java       |  35 +--
 .../apache/tika/parser/image/MetadataFields.java   |  12 +-
 .../tika/parser/journal/GrobidRESTParser.java      | 138 +++++-----
 .../ooxml/xwpf/XWPFEventBasedWordExtractor.java    |  23 +-
 .../tika/parser/ner/grobid/GrobidNERecogniser.java |   3 +-
 .../tika/parser/ner/nltk/NLTKNERecogniser.java     |   4 +-
 .../apache/tika/parser/utils/CommonsDigester.java  |   7 +-
 .../tika/language/translate/GoogleTranslator.java  |   6 +-
 .../tika/language/translate/Lingo24Translator.java |   6 +-
 .../language/translate/MicrosoftTranslator.java    |   6 +-
 .../tika/language/translate/YandexTranslator.java  |   8 +-
 23 files changed, 348 insertions(+), 275 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].

[tika] 03/03: TIKA-2364 -- convert printstacktrace to log

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit ed57e6edb225948ca10e2fcb25f8e0fce45f8fc0
Author: tballison <ta...@mitre.org>
AuthorDate: Tue May 16 09:46:05 2017 -0400

    TIKA-2364 -- convert printstacktrace to log
---
 .../org/apache/tika/eval/AbstractProfiler.java     |  8 +++--
 .../java/org/apache/tika/eval/ExtractComparer.java |  1 -
 .../java/org/apache/tika/eval/db/JDBCUtil.java     |  1 -
 .../java/org/apache/tika/eval/io/DBWriter.java     |  2 +-
 .../java/org/apache/tika/eval/io/XMLLogReader.java |  9 +-----
 .../tika/langdetect/Lingo24LangDetector.java       |  9 ++++--
 .../apache/tika/langdetect/TextLangDetector.java   | 11 +++++--
 .../chm/accessor/ChmDirectoryListingSet.java       |  9 ++++--
 .../apache/tika/parser/chm/core/ChmCommons.java    | 10 +++++--
 .../apache/tika/parser/chm/core/ChmExtractor.java  |  9 +++++-
 .../org/apache/tika/parser/gdal/GDALParser.java    |  7 +++--
 .../geoinfo/GeographicInformationParser.java       | 35 +++++++++++-----------
 .../apache/tika/parser/image/MetadataFields.java   | 12 +++++---
 .../tika/parser/journal/GrobidRESTParser.java      | 12 ++++++--
 .../ooxml/xwpf/XWPFEventBasedWordExtractor.java    | 23 +++++++++-----
 .../tika/parser/ner/grobid/GrobidNERecogniser.java |  3 +-
 .../tika/parser/ner/nltk/NLTKNERecogniser.java     |  4 +--
 .../apache/tika/parser/utils/CommonsDigester.java  |  7 ++++-
 .../tika/language/translate/GoogleTranslator.java  |  6 +++-
 .../tika/language/translate/Lingo24Translator.java |  6 +++-
 .../language/translate/MicrosoftTranslator.java    |  6 +++-
 .../tika/language/translate/YandexTranslator.java  |  8 +++--
 22 files changed, 132 insertions(+), 66 deletions(-)

diff --git a/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java b/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
index d2b3153..5029ecf 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
@@ -62,9 +62,14 @@ import org.apache.tika.metadata.PagedText;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.utils.ExceptionUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public abstract class AbstractProfiler extends FileResourceConsumer {
 
+    private static final Logger LOG = LoggerFactory.getLogger(AbstractProfiler.class);
+
+
     private static final String[] EXTRACT_EXTENSIONS = {
             ".json",
             ".txt",
@@ -505,8 +510,7 @@ public abstract class AbstractProfiler extends FileResourceConsumer {
                 c = r.read();
             }
         } catch (IOException e) {
-            e.printStackTrace();
-            //swallow
+            LOG.warn("IOException", e);
         }
 
         List<Pair<String, Integer>> pairs = new ArrayList<>();
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java b/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
index d79503b..fd1c382 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
@@ -199,7 +199,6 @@ public class ExtractComparer extends AbstractProfiler {
         try {
             compareFiles(fpsA, fpsB);
         } catch (Throwable e) {
-            e.printStackTrace();
             //this should be cataclysmic...
             throw new RuntimeException("Exception while working on: " +
                     metadata.get(FSProperties.FS_REL_PATH), e);
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/db/JDBCUtil.java b/tika-eval/src/main/java/org/apache/tika/eval/db/JDBCUtil.java
index aaf8403..794c55b 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/db/JDBCUtil.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/db/JDBCUtil.java
@@ -161,7 +161,6 @@ public class JDBCUtil {
             return insertStatement.executeUpdate();
         } catch (SQLException e) {
             LOG.warn("couldn't insert data for this row: {}", e.getMessage());
-            e.printStackTrace();
             return -1;
         }
     }
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/io/DBWriter.java b/tika-eval/src/main/java/org/apache/tika/eval/io/DBWriter.java
index 2b8dbb1..8aea3cd 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/io/DBWriter.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/io/DBWriter.java
@@ -46,6 +46,7 @@ import org.slf4j.LoggerFactory;
  * DBWriter creates its own PreparedStatements at initialization.
  */
 public class DBWriter implements IDBWriter {
+
     private static final Logger LOG = LoggerFactory.getLogger(DBWriter.class);
 
     private static final AtomicInteger WRITER_ID = new AtomicInteger();
@@ -129,7 +130,6 @@ public class DBWriter implements IDBWriter {
         try {
             conn.commit();
         } catch (SQLException e){
-            e.printStackTrace();
             throw new IOExceptionWithCause(e);
         }
         try {
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java b/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
index 5130438..753866b 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
@@ -39,11 +39,6 @@ public class XMLLogReader {
 
     public void read(InputStream xmlLogFileIs, XMLLogMsgHandler handler) throws XMLStreamException {
         InputStream is = new LogXMLWrappingInputStream(xmlLogFileIs);
-       /* try {
-            System.out.println("WRAPPED: " + IOUtils.toString(is)+ "<<WRAPPED");
-        } catch (IOException e) {
-            e.printStackTrace();
-        }*/
         XMLInputFactory factory = new ParseContext().getXMLInputFactory();
         XMLStreamReader reader = factory.createXMLStreamReader(is);
 
@@ -58,10 +53,8 @@ public class XMLLogReader {
                         try {
                             handler.handleMsg(level, reader.getElementText());
                         } catch (IOException e) {
-                            e.printStackTrace();
                             LOG.warn("Error parsing: {}", reader.getElementText());
                         } catch (SQLException e) {
-                            e.printStackTrace();
                             LOG.warn("SQLException: {}", e.getMessage());
                         }
                     }
@@ -70,7 +63,7 @@ public class XMLLogReader {
                     if ("event".equals(reader.getLocalName())) {
                         level = null;
                     } else if ("message".equals(reader.getLocalName())) {
-                        //sdo we care any more?
+                        //do we care any more?
                     }
                     break;
             };
diff --git a/tika-langdetect/src/main/java/org/apache/tika/langdetect/Lingo24LangDetector.java b/tika-langdetect/src/main/java/org/apache/tika/langdetect/Lingo24LangDetector.java
index f89d34e..9ce9548 100644
--- a/tika-langdetect/src/main/java/org/apache/tika/langdetect/Lingo24LangDetector.java
+++ b/tika-langdetect/src/main/java/org/apache/tika/langdetect/Lingo24LangDetector.java
@@ -24,6 +24,8 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.language.detect.LanguageConfidence;
 import org.apache.tika.language.detect.LanguageDetector;
 import org.apache.tika.language.detect.LanguageResult;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.ws.rs.core.Form;
 import javax.ws.rs.core.MediaType;
@@ -46,6 +48,9 @@ import java.util.Set;
  */
 public class Lingo24LangDetector extends LanguageDetector {
 
+    private static final Logger LOG = LoggerFactory.getLogger(Lingo24LangDetector.class);
+
+
     private static final String LINGO24_TRANSLATE_URL_BASE = "https://api.lingo24.com/mt/v1/";
     private static final String LINGO24_LANGID_ACTION = "langid";
     private static final String LINGO24_SOURCELANG_ACTION = "sourcelangs";
@@ -80,7 +85,7 @@ public class Lingo24LangDetector extends LanguageDetector {
                 this.isAvailable = false;
             }
         } catch (Exception e) {
-            e.printStackTrace();
+            LOG.warn("Couldn't load config", e);
             isAvailable = false;
         }
         writer = new CharArrayWriter();
@@ -181,7 +186,7 @@ public class Lingo24LangDetector extends LanguageDetector {
                 languages.add(jsonElement.getAsJsonArray().get(0).getAsString());
             }
         } catch (Throwable e) {
-            e.printStackTrace();
+            LOG.warn("problem detecting", e);
         } finally {
             if (_client != null) {
                 _client.close();
diff --git a/tika-langdetect/src/main/java/org/apache/tika/langdetect/TextLangDetector.java b/tika-langdetect/src/main/java/org/apache/tika/langdetect/TextLangDetector.java
index 89fbfe5..dd1dbbb 100644
--- a/tika-langdetect/src/main/java/org/apache/tika/langdetect/TextLangDetector.java
+++ b/tika-langdetect/src/main/java/org/apache/tika/langdetect/TextLangDetector.java
@@ -23,6 +23,8 @@ import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.tika.language.detect.LanguageConfidence;
 import org.apache.tika.language.detect.LanguageDetector;
 import org.apache.tika.language.detect.LanguageResult;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.ws.rs.core.Response;
 import java.io.CharArrayWriter;
@@ -41,6 +43,9 @@ import java.util.*;
  */
 public class TextLangDetector extends LanguageDetector {
 
+    private static final Logger LOG = LoggerFactory.getLogger(TextLangDetector.class);
+
+
     private static final String TEXT_REST_HOST = "http://localhost:8000";
     private static final String TEXT_LID_PATH = "/lid";
 
@@ -111,7 +116,7 @@ public class TextLangDetector extends LanguageDetector {
                 languages.add(jsonElement.toString());
             }
         } catch (Exception e) {
-            e.printStackTrace();
+            LOG.warn("problem getting and parsing json", e);
         }
         return languages;
     }
@@ -125,7 +130,7 @@ public class TextLangDetector extends LanguageDetector {
             String json = response.readEntity(String.class);
             language = new JsonParser().parse(json).getAsJsonObject().get("language").getAsString();
         } catch (Exception e) {
-            e.printStackTrace();
+            LOG.warn("problem detecting", e);
         }
         return language;
     }
@@ -139,7 +144,7 @@ public class TextLangDetector extends LanguageDetector {
             JsonArray jsonArray = new JsonParser().parse(json).getAsJsonObject().get("all_languages").getAsJsonArray();
             return jsonArray.size() != 0;
         } catch (Exception e) {
-            e.printStackTrace();
+            LOG.warn("Can't run", e);
             return false;
         }
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
index e96426f..ccb3286 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
@@ -26,11 +26,16 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.parser.chm.core.ChmCommons;
 import org.apache.tika.parser.chm.core.ChmConstants;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Holds chm listing entries
  */
 public class ChmDirectoryListingSet {
+
+    private static final Logger LOG = LoggerFactory.getLogger(ChmDirectoryListingSet.class);
+
     private List<DirectoryListingEntry> dlel;
     private byte[] data;
     private int placeHolder = -1;
@@ -147,7 +152,7 @@ public class ChmDirectoryListingSet {
                 dir_chunk = null;
             }
         } catch (ChmParsingException e) {
-            e.printStackTrace();
+            LOG.warn("Chm parse exception", e);
         } finally {
             setData(null);
         }
@@ -312,7 +317,7 @@ public class ChmDirectoryListingSet {
             }
 
 //        } catch (Exception e) {
-//            e.printStackTrace();
+//                LOG.warn("problem parsing", e);
 //        }
     }
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java
index a9d2454..0bbdbe1 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java
@@ -26,8 +26,14 @@ import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
 import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
 import org.apache.tika.parser.chm.assertion.ChmAssert;
 import org.apache.tika.parser.chm.exception.ChmParsingException;
+import org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class ChmCommons {
+
+    private static final Logger LOG = LoggerFactory.getLogger(ChmCommons.class);
+
     /* Prevents initialization */
     private ChmCommons() {
     }
@@ -216,14 +222,14 @@ public class ChmCommons {
             } catch (FileNotFoundException e) {
                 throw new TikaException(e.getMessage());
             } catch (IOException e) {
-                e.printStackTrace();
+                LOG.warn("problem writing tmp file", e);
             } finally {
                 if (output != null)
                     try {
                         output.flush();
                         output.close();
                     } catch (IOException e) {
-                        e.printStackTrace();
+                        LOG.warn("problem writing tmp file", e);
                     }
             }
         }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
index c1e4495..faadc4d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
@@ -36,11 +36,18 @@ import org.apache.tika.parser.chm.assertion.ChmAssert;
 import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
 import org.apache.tika.parser.chm.lzx.ChmBlockInfo;
 import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
+import org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Extracts text from chm file. Enumerates chm entries.
  */
 public class ChmExtractor {
+
+    private static final Logger LOG = LoggerFactory.getLogger(ChmExtractor.class);
+
+
     private List<ChmLzxBlock> lzxBlocksCache = null;
     private ChmDirectoryListingSet chmDirList = null;
     private ChmItsfHeader chmItsfHeader = null;
@@ -216,7 +223,7 @@ public class ChmExtractor {
             setLzxBlocksCache(new ArrayList<ChmLzxBlock>());
 
         } catch (IOException e) {
-            e.printStackTrace();
+            LOG.warn("IOException parsing chm file", e);
         }
     }
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
index aba00fa..66d1c5f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
@@ -39,6 +39,8 @@ import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.external.ExternalParser;
 import org.apache.tika.sax.XHTMLContentHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -67,6 +69,7 @@ import static org.apache.tika.parser.external.ExternalParser.INPUT_FILE_TOKEN;
 public class GDALParser extends AbstractParser {
 
     private static final long serialVersionUID = -3869130527323941401L;
+    private static final Logger LOG = LoggerFactory.getLogger(GDALParser.class);
 
     private String command;
 
@@ -91,7 +94,7 @@ public class GDALParser extends AbstractParser {
                         .getPath());
             }
         } catch (Exception e) {
-            e.printStackTrace();
+            LOG.warn("exception processing command", e);
         }
 
         return pCommand;
@@ -367,7 +370,7 @@ public class GDALParser extends AbstractParser {
             try {
                 output = extractOutput(out);
             } catch (Exception e) {
-                e.printStackTrace();
+                LOG.warn("Exception extracting output", e);
                 output = "";
             }
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
index 39fb94e..27b8040 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/geoinfo/GeographicInformationParser.java
@@ -65,12 +65,17 @@ import org.opengis.metadata.identification.Keywords;
 import org.opengis.metadata.identification.Progress;
 import org.opengis.metadata.identification.TopicCategory;
 import org.opengis.util.InternationalString;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 
 public class GeographicInformationParser extends AbstractParser{
 
+    private static final Logger LOG = LoggerFactory.getLogger(GeographicInformationParser.class);
+
+
     public static final String geoInfoType="text/iso19139+xml";
     private final Set<MediaType> SUPPORTED_TYPES =
             Collections.singleton(MediaType.text("iso19139+xml"));
@@ -101,7 +106,7 @@ public class GeographicInformationParser extends AbstractParser{
         } catch (UnsupportedStorageException e) {
             throw new TikaException("UnsupportedStorageException",e);
         } catch (DataStoreException e) {
-            throw new TikaException("DataStoreException",e);
+            throw new TikaException("DataStoreException", e);
         } finally {
             if (tmp != null) {
                 tmp.dispose();
@@ -109,22 +114,18 @@ public class GeographicInformationParser extends AbstractParser{
         }
     }
 
-    private void extract(XHTMLContentHandler xhtmlContentHandler,Metadata metadata, DefaultMetadata defaultMetadata) throws SAXException{
-        try {
-            getMetaDataCharacterSet(metadata, defaultMetadata);
-            getMetaDataContact(metadata, defaultMetadata);
-            getMetaDataIdentificationInfo(metadata, defaultMetadata);
-            getMetaDataDistributionInfo(metadata, defaultMetadata);
-            getMetaDataDateInfo(metadata, defaultMetadata);
-            getMetaDataResourceScope(metadata, defaultMetadata);
-            getMetaDataParentMetaDataTitle(metadata, defaultMetadata);
-            getMetaDataIdetifierCode(metadata, defaultMetadata);
-            getMetaDataStandard(metadata, defaultMetadata);
-            extractContent(xhtmlContentHandler, defaultMetadata);
-        }
-        catch(Exception e){
-            e.printStackTrace();
-        }
+    private void extract(XHTMLContentHandler xhtmlContentHandler, Metadata metadata,
+                         DefaultMetadata defaultMetadata) throws SAXException {
+        getMetaDataCharacterSet(metadata, defaultMetadata);
+        getMetaDataContact(metadata, defaultMetadata);
+        getMetaDataIdentificationInfo(metadata, defaultMetadata);
+        getMetaDataDistributionInfo(metadata, defaultMetadata);
+        getMetaDataDateInfo(metadata, defaultMetadata);
+        getMetaDataResourceScope(metadata, defaultMetadata);
+        getMetaDataParentMetaDataTitle(metadata, defaultMetadata);
+        getMetaDataIdetifierCode(metadata, defaultMetadata);
+        getMetaDataStandard(metadata, defaultMetadata);
+        extractContent(xhtmlContentHandler, defaultMetadata);
     }
 
     private void extractContent(XHTMLContentHandler xhtmlContentHandler, DefaultMetadata defaultMetadata) throws SAXException{
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/MetadataFields.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/MetadataFields.java
index 5238751..c7c1440 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/MetadataFields.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/MetadataFields.java
@@ -23,6 +23,8 @@ import java.util.HashSet;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Knowns about all declared {@link Metadata} fields.
@@ -31,6 +33,8 @@ import org.apache.tika.metadata.TikaCoreProperties;
  */
 public abstract class MetadataFields {
 
+    private static final Logger LOG = LoggerFactory.getLogger(MetadataFields.class);
+
     private static HashSet<String> known;
 
     static {
@@ -52,9 +56,9 @@ public abstract class MetadataFields {
                             known.add(p);
                         }
                     } catch (IllegalArgumentException e) {
-                        e.printStackTrace();
+                        LOG.warn("Illegal argument in field", e);
                     } catch (IllegalAccessException e) {
-                        e.printStackTrace();
+                        LOG.warn("Illegal access in field", e);
                     }
                 }
                 if (Property.class.isAssignableFrom(c)) {
@@ -64,9 +68,9 @@ public abstract class MetadataFields {
                             known.add(p.getName());
                         }
                     } catch (IllegalArgumentException e) {
-                        e.printStackTrace();
+                        LOG.warn("Illegal argument in field", e);
                     } catch (IllegalAccessException e) {
-                        e.printStackTrace();
+                        LOG.warn("Illegal access in field", e);
                     }
                 }
             }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
index 22526ff..f1d6924 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
@@ -32,10 +32,16 @@ import org.apache.cxf.jaxrs.ext.multipart.ContentDisposition;
 import org.apache.cxf.jaxrs.ext.multipart.MultipartBody;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.xml.sax.ContentHandler;
 
 public class GrobidRESTParser {
 
+    private static final Logger LOG = LoggerFactory.getLogger(GrobidRESTParser.class);
+
+
     private static final String GROBID_REST_HOST = "http://localhost:8080";
 
     private static final String GROBID_ISALIVE_PATH = "/grobid"; // isalive
@@ -51,7 +57,7 @@ public class GrobidRESTParser {
         try {
             restHostUrlStr = readRestUrl();
         } catch (IOException e) {
-            e.printStackTrace();
+            LOG.warn("can't read rest url", e);
         }
 
         if (restHostUrlStr == null
@@ -83,7 +89,7 @@ public class GrobidRESTParser {
                 metadata.add("grobid:header_" + key, teiMet.get(key));
             }
         } catch (Exception e) {
-            e.printStackTrace();
+            LOG.warn("Couldn't read response", e);
         }
     }
 
@@ -104,7 +110,7 @@ public class GrobidRESTParser {
             String resp = response.readEntity(String.class);
             return resp != null && !resp.equals("") && resp.startsWith("<h4>");
         } catch (Exception e) {
-            e.printStackTrace();
+            //swallow...can't run
             return false;
         }
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
index 7466d09..7a5c0c7 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
@@ -43,6 +43,8 @@ import org.apache.tika.parser.microsoft.ooxml.ParagraphProperties;
 import org.apache.tika.parser.microsoft.ooxml.RunProperties;
 import org.apache.tika.parser.microsoft.ooxml.XWPFListManager;
 import org.apache.xmlbeans.XmlException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 import org.xml.sax.XMLReader;
@@ -54,6 +56,8 @@ import org.xml.sax.XMLReader;
  */
 public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
 
+    private static final Logger LOG = LoggerFactory.getLogger(XWPFEventBasedWordExtractor.class);
+
     private OPCPackage container;
     private POIXMLProperties properties;
 
@@ -108,9 +112,10 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
                 try {
                     handleDocumentPart(pp, sb);
                 } catch (IOException e) {
-                    e.printStackTrace();
+                    LOG.warn("IOException handling document part", e);
                 } catch (SAXException e) {
-                    e.printStackTrace();
+                    //swallow this because we don't actually call it
+                    LOG.warn("SAXException handling document part", e);
                 }
             }
         }
@@ -123,9 +128,10 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
                 try {
                     handleDocumentPart(pp, sb);
                 } catch (IOException e) {
-                    e.printStackTrace();
+                    LOG.warn("IOException handling glossary document part", e);
                 } catch (SAXException e) {
-                    e.printStackTrace();
+                    //swallow this because we don't actually call it
+                    LOG.warn("SAXException handling glossary document part", e);
                 }
             }
         }
@@ -150,7 +156,7 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
                 }
             }
         } catch (InvalidFormatException e) {
-            //swallow
+            LOG.warn("Invalid format", e);
         }
 
         //main document
@@ -172,7 +178,7 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
                     }
                 }
             } catch (InvalidFormatException e) {
-                //swallow
+                LOG.warn("Invalid format", e);
             }
         }
     }
@@ -188,7 +194,7 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
             reader.parse(new InputSource(new CloseShieldInputStream(stream)));
 
         } catch (ParserConfigurationException e) {
-            e.printStackTrace();
+            LOG.warn("Can't configure XMLReader", e);
         }
 
     }
@@ -209,6 +215,7 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
                 }
             }
         } catch (InvalidFormatException e) {
+            LOG.warn("Invalid format", e);
         }
         return hyperlinks;
     }
@@ -228,7 +235,7 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
                 return new XWPFNumbering(numberingPart);
             }
         } catch (IOException | OpenXML4JException e) {
-            //swallow
+            LOG.warn("Couldn't load numbering", e);
         }
         return null;
     }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java
index 06d0112..cf97194 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java
@@ -55,7 +55,8 @@ public class GrobidNERecogniser implements NERecogniser{
 	            try {
 	                restHostUrlStr = readRestUrl();
 	            } catch (IOException e) {
-	                e.printStackTrace();
+	            	LOG.warn("couldn't read rest url", e);
+
 	            }
 
 	            if (restHostUrlStr == null || restHostUrlStr.equals("")) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
index 4ca723b..e7b3638 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
@@ -64,7 +64,7 @@ public class NLTKNERecogniser implements NERecogniser {
             try {
                 restHostUrlStr = readRestUrl();
             } catch (IOException e) {
-                e.printStackTrace();
+                LOG.warn("Can't read rest url", e);
             }
 
             if (restHostUrlStr == null || restHostUrlStr.equals("")) {
@@ -83,7 +83,7 @@ public class NLTKNERecogniser implements NERecogniser {
             }
 
         } catch (Exception e) {
-            LOG.debug(e.getMessage(), e);
+            LOG.warn(e.getMessage(), e);
         }
     }
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java b/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
index 3a01740..0d2c5df 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
@@ -37,6 +37,8 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.DigestingParser;
 import org.apache.tika.parser.ParseContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Implementation of {@link org.apache.tika.parser.DigestingParser.Digester}
@@ -53,6 +55,9 @@ import org.apache.tika.parser.ParseContext;
  */
 public class CommonsDigester implements DigestingParser.Digester {
 
+    private static final Logger LOG = LoggerFactory.getLogger(CommonsDigester.class);
+
+
     public enum DigestAlgorithm {
         //those currently available in commons.digest
         MD2,
@@ -178,7 +183,7 @@ public class CommonsDigester implements DigestingParser.Digester {
                     throw new IllegalArgumentException("Sorry, not aware of algorithm: " + algorithm.toString());
             }
         } catch (IOException e) {
-            e.printStackTrace();
+            LOG.warn("Problem digesting", e);
             //swallow, or should we throw this?
         }
         if (is instanceof SimpleBoundedInputStream) {
diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
index cdab2ad..a2c029c 100644
--- a/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
+++ b/tika-translate/src/main/java/org/apache/tika/language/translate/GoogleTranslator.java
@@ -32,6 +32,8 @@ import org.apache.tika.exception.TikaException;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * An implementation of a REST client to the <a
@@ -45,6 +47,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
  */
 public class GoogleTranslator extends AbstractTranslator {
 
+	private static final Logger LOG = LoggerFactory.getLogger(GoogleTranslator.class);
+
 	private static final String GOOGLE_TRANSLATE_URL_BASE = "https://www.googleapis.com/language/translate/v2";
 
 	private static final String DEFAULT_KEY = "dummy-secret";
@@ -67,7 +71,7 @@ public class GoogleTranslator extends AbstractTranslator {
 			if (this.apiKey.equals(DEFAULT_KEY))
 				this.isAvailable = false;
 		} catch (Exception e) {
-			e.printStackTrace();
+			LOG.warn("Exception reading config file", e);
 			isAvailable = false;
 		}
 	}
diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
index 22589d9..d8a81b3 100644
--- a/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
+++ b/tika-translate/src/main/java/org/apache/tika/language/translate/Lingo24Translator.java
@@ -33,6 +33,8 @@ import org.apache.tika.exception.TikaException;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * An implementation of a REST client for the
@@ -42,6 +44,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
  */
 public class Lingo24Translator extends AbstractTranslator {
 
+    private static final Logger LOG = LoggerFactory.getLogger(Lingo24Translator.class);
+
     private static final String LINGO24_TRANSLATE_URL_BASE = "https://api.lingo24.com/mt/v1/translate";
 
     private static final String DEFAULT_KEY = "dummy-key";
@@ -64,7 +68,7 @@ public class Lingo24Translator extends AbstractTranslator {
             if (this.userKey.equals(DEFAULT_KEY))
                 this.isAvailable = false;
         } catch (Exception e) {
-            e.printStackTrace();
+            LOG.warn("Couldn't read config file", e);
             isAvailable = false;
         }
     }
diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/MicrosoftTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/MicrosoftTranslator.java
index b70da55..b05e98e 100644
--- a/tika-translate/src/main/java/org/apache/tika/language/translate/MicrosoftTranslator.java
+++ b/tika-translate/src/main/java/org/apache/tika/language/translate/MicrosoftTranslator.java
@@ -20,6 +20,8 @@ package org.apache.tika.language.translate;
 import com.memetix.mst.language.Language;
 import com.memetix.mst.translate.Translate;
 import org.apache.tika.exception.TikaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -32,6 +34,8 @@ import java.util.Properties;
  */
 public class MicrosoftTranslator implements Translator {
 
+    private static final Logger LOG = LoggerFactory.getLogger(MicrosoftTranslator.class);
+
     boolean available;              // Flag for whether or not translation is available.
     String clientId, clientSecret;  // Keys used for the API calls.
 
@@ -60,7 +64,7 @@ public class MicrosoftTranslator implements Translator {
                 this.available = checkAvailable();   
             }
         } catch (IOException e) {
-        	e.printStackTrace();
+            LOG.warn("Error loading props file", e);
             // Error with properties file. Translation will not work.
             available = false;
         }
diff --git a/tika-translate/src/main/java/org/apache/tika/language/translate/YandexTranslator.java b/tika-translate/src/main/java/org/apache/tika/language/translate/YandexTranslator.java
index dc0d14c..d1960fc 100644
--- a/tika-translate/src/main/java/org/apache/tika/language/translate/YandexTranslator.java
+++ b/tika-translate/src/main/java/org/apache/tika/language/translate/YandexTranslator.java
@@ -32,6 +32,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.language.translate.Translator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
@@ -41,7 +43,9 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * and set your Application's User Key in the <code>translator.yandex.properties</code> file.
  */
 public class YandexTranslator implements Translator {
-    
+
+    private static final Logger LOG = LoggerFactory.getLogger(YandexTranslator.class);
+
     /**
      * Yandex Translate API service end-point URL
      */
@@ -72,7 +76,7 @@ public class YandexTranslator implements Translator {
             this.apiKey = config.getProperty("translator.api-key");
             this.format = config.getProperty("translator.text.format");
         } catch (Exception e) {
-            e.printStackTrace();
+            LOG.warn("Exception loading Yandex config", e);
         }
     }
 

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.

[tika] 02/03: clean up indentation

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit ac1791af88e55fea8b1ff65987e32af93f528359
Author: tballison <ta...@mitre.org>
AuthorDate: Tue May 16 08:35:23 2017 -0400

    clean up indentation
---
 .../tika/parser/journal/GrobidRESTParser.java      | 132 ++++++++++-----------
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
index 05b09fc..22526ff 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
@@ -5,9 +5,9 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -36,77 +36,77 @@ import org.xml.sax.ContentHandler;
 
 public class GrobidRESTParser {
 
-  private static final String GROBID_REST_HOST = "http://localhost:8080";
+    private static final String GROBID_REST_HOST = "http://localhost:8080";
 
-  private static final String GROBID_ISALIVE_PATH = "/grobid"; // isalive
-                                                               // doesn't work
-                                                               // nfc why
+    private static final String GROBID_ISALIVE_PATH = "/grobid"; // isalive
+    // doesn't work
+    // nfc why
 
-  private static final String GROBID_PROCESSHEADER_PATH = "/processHeaderDocument";
+    private static final String GROBID_PROCESSHEADER_PATH = "/processHeaderDocument";
 
-  private String restHostUrlStr;
+    private String restHostUrlStr;
 
-  public GrobidRESTParser() {
-    String restHostUrlStr = null;
-    try {
-      restHostUrlStr = readRestUrl();
-    } catch (IOException e) {
-      e.printStackTrace();
+    public GrobidRESTParser() {
+        String restHostUrlStr = null;
+        try {
+            restHostUrlStr = readRestUrl();
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+
+        if (restHostUrlStr == null
+                || (restHostUrlStr != null && restHostUrlStr.equals(""))) {
+            this.restHostUrlStr = GROBID_REST_HOST;
+        } else {
+            this.restHostUrlStr = restHostUrlStr;
+        }
     }
 
-    if (restHostUrlStr == null
-        || (restHostUrlStr != null && restHostUrlStr.equals(""))) {
-      this.restHostUrlStr = GROBID_REST_HOST;
-    } else {
-      this.restHostUrlStr = restHostUrlStr;
+    public void parse(String filePath, ContentHandler handler, Metadata metadata,
+                      ParseContext context) throws FileNotFoundException {
+
+        File pdfFile = new File(filePath);
+        ContentDisposition cd = new ContentDisposition(
+                "form-data; name=\"input\"; filename=\"" + pdfFile.getName() + "\"");
+        Attachment att = new Attachment("input", new FileInputStream(pdfFile), cd);
+        MultipartBody body = new MultipartBody(att);
+
+        Response response = WebClient
+                .create(restHostUrlStr + GROBID_PROCESSHEADER_PATH)
+                .accept(MediaType.APPLICATION_XML).type(MediaType.MULTIPART_FORM_DATA)
+                .post(body);
+
+        try {
+            String resp = response.readEntity(String.class);
+            Metadata teiMet = new TEIParser().parse(resp);
+            for (String key : teiMet.names()) {
+                metadata.add("grobid:header_" + key, teiMet.get(key));
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
     }
-  }
-
-  public void parse(String filePath, ContentHandler handler, Metadata metadata,
-      ParseContext context) throws FileNotFoundException {
-
-    File pdfFile = new File(filePath);
-    ContentDisposition cd = new ContentDisposition(
-        "form-data; name=\"input\"; filename=\"" + pdfFile.getName() + "\"");
-    Attachment att = new Attachment("input", new FileInputStream(pdfFile), cd);
-    MultipartBody body = new MultipartBody(att);
-
-    Response response = WebClient
-        .create(restHostUrlStr + GROBID_PROCESSHEADER_PATH)
-        .accept(MediaType.APPLICATION_XML).type(MediaType.MULTIPART_FORM_DATA)
-        .post(body);
-
-    try {
-      String resp = response.readEntity(String.class);
-      Metadata teiMet = new TEIParser().parse(resp);
-      for (String key : teiMet.names()) {
-        metadata.add("grobid:header_" + key, teiMet.get(key));
-      }
-    } catch (Exception e) {
-      e.printStackTrace();
+
+    private static String readRestUrl() throws IOException {
+        Properties grobidProperties = new Properties();
+        grobidProperties.load(GrobidRESTParser.class
+                .getResourceAsStream("GrobidExtractor.properties"));
+
+        return grobidProperties.getProperty("grobid.server.url");
     }
-  }
-
-  private static String readRestUrl() throws IOException {
-    Properties grobidProperties = new Properties();
-    grobidProperties.load(GrobidRESTParser.class
-        .getResourceAsStream("GrobidExtractor.properties"));
-
-    return grobidProperties.getProperty("grobid.server.url");
-  }
-
-  protected static boolean canRun() {
-    Response response = null;
-
-    try {
-      response = WebClient.create(readRestUrl() + GROBID_ISALIVE_PATH)
-          .accept(MediaType.TEXT_HTML).get();
-      String resp = response.readEntity(String.class);
-      return resp != null && !resp.equals("") && resp.startsWith("<h4>");
-    } catch (Exception e) {
-      e.printStackTrace();
-      return false;
+
+    protected static boolean canRun() {
+        Response response = null;
+
+        try {
+            response = WebClient.create(readRestUrl() + GROBID_ISALIVE_PATH)
+                    .accept(MediaType.TEXT_HTML).get();
+            String resp = response.readEntity(String.class);
+            return resp != null && !resp.equals("") && resp.startsWith("<h4>");
+        } catch (Exception e) {
+            e.printStackTrace();
+            return false;
+        }
     }
-  }
 
 }

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.

[tika] 01/03: clean up white space

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit f78b7d0f16a86d86f18332b120e5f131c62948fb
Author: tballison <ta...@mitre.org>
AuthorDate: Tue May 16 08:30:47 2017 -0400

    clean up white space
---
 .../tika/parser/external/ExternalParser.java       | 299 +++++++++++----------
 1 file changed, 153 insertions(+), 146 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java b/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
index 1cbff91..ce539f6 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
@@ -49,17 +49,19 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 
 /**
  * Parser that uses an external program (like catdoc or pdf2txt) to extract
- *  text content and metadata from a given document.
+ * text content and metadata from a given document.
  */
 public class ExternalParser extends AbstractParser {
 
     /**
      * Consumer contract
+     *
      * @since Apache Tika 1.14
      */
     public interface LineConsumer extends Serializable {
         /**
          * Consume a line
+         *
          * @param line a line of string
          */
         void consume(String line);
@@ -76,16 +78,16 @@ public class ExternalParser extends AbstractParser {
     }
 
     private static final long serialVersionUID = -1079128990650687037L;
-    
+
     /**
      * The token, which if present in the Command string, will
-     *  be replaced with the input filename. 
+     * be replaced with the input filename.
      * Alternately, the input data can be streamed over STDIN.
      */
     public static final String INPUT_FILE_TOKEN = "${INPUT}";
     /**
      * The token, which if present in the Command string, will
-     *  be replaced with the output filename. 
+     * be replaced with the output filename.
      * Alternately, the output data can be collected on STDOUT.
      */
     public static final String OUTPUT_FILE_TOKEN = "${OUTPUT}";
@@ -94,18 +96,19 @@ public class ExternalParser extends AbstractParser {
      * Media types supported by the external program.
      */
     private Set<MediaType> supportedTypes = Collections.emptySet();
-    
+
     /**
      * Regular Expressions to run over STDOUT to
-     *  extract Metadata.
+     * extract Metadata.
      */
-    private Map<Pattern,String> metadataPatterns = null;
+    private Map<Pattern, String> metadataPatterns = null;
 
     /**
      * The external command to invoke.
+     *
      * @see Runtime#exec(String[])
      */
-    private String[] command = new String[] { "cat" };
+    private String[] command = new String[]{"cat"};
 
     /**
      * A consumer for ignored Lines
@@ -122,7 +125,7 @@ public class ExternalParser extends AbstractParser {
 
     public void setSupportedTypes(Set<MediaType> supportedTypes) {
         this.supportedTypes =
-            Collections.unmodifiableSet(new HashSet<MediaType>(supportedTypes));
+                Collections.unmodifiableSet(new HashSet<MediaType>(supportedTypes));
     }
 
 
@@ -132,8 +135,9 @@ public class ExternalParser extends AbstractParser {
 
     /**
      * Sets the command to be run. This can include either of
-     *  {@link #INPUT_FILE_TOKEN} or {@link #OUTPUT_FILE_TOKEN}
-     *  if the command needs filenames.
+     * {@link #INPUT_FILE_TOKEN} or {@link #OUTPUT_FILE_TOKEN}
+     * if the command needs filenames.
+     *
      * @see Runtime#exec(String[])
      */
     public void setCommand(String... command) {
@@ -142,6 +146,7 @@ public class ExternalParser extends AbstractParser {
 
     /**
      * Gets lines consumer
+     *
      * @return consumer instance
      */
     public LineConsumer getIgnoredLineConsumer() {
@@ -150,39 +155,40 @@ public class ExternalParser extends AbstractParser {
 
     /**
      * Set a consumer for the lines ignored by the parse functions
+     *
      * @param ignoredLineConsumer consumer instance
      */
     public void setIgnoredLineConsumer(LineConsumer ignoredLineConsumer) {
         this.ignoredLineConsumer = ignoredLineConsumer;
     }
 
-    public Map<Pattern,String> getMetadataExtractionPatterns() {
-       return metadataPatterns;
+    public Map<Pattern, String> getMetadataExtractionPatterns() {
+        return metadataPatterns;
     }
-    
+
     /**
      * Sets the map of regular expression patterns and Metadata
-     *  keys. Any matching patterns will have the matching
-     *  metadata entries set.
+     * keys. Any matching patterns will have the matching
+     * metadata entries set.
      * Set this to null to disable Metadata extraction.
      */
-    public void setMetadataExtractionPatterns(Map<Pattern,String> patterns) {
-       this.metadataPatterns = patterns;
+    public void setMetadataExtractionPatterns(Map<Pattern, String> patterns) {
+        this.metadataPatterns = patterns;
     }
-    
+
 
     /**
      * Executes the configured external command and passes the given document
-     *  stream as a simple XHTML document to the given SAX content handler.
+     * stream as a simple XHTML document to the given SAX content handler.
      * Metadata is only extracted if {@link #setMetadataExtractionPatterns(Map)}
-     *  has been called to set patterns.
+     * has been called to set patterns.
      */
     public void parse(
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
         XHTMLContentHandler xhtml =
-            new XHTMLContentHandler(handler, metadata);
+                new XHTMLContentHandler(handler, metadata);
 
         TemporaryResources tmp = new TemporaryResources();
         try {
@@ -211,57 +217,56 @@ public class ExternalParser extends AbstractParser {
             cmd = new String[command.length];
             System.arraycopy(command, 0, cmd, 0, command.length);
         }
-        for(int i=0; i<cmd.length; i++) {
-           if(cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) {
-              cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath());
-              inputToStdIn = false;
-           }
-           if(cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) {
-              output = tmp.createTemporaryFile();
-              outputFromStdOut = false;
-              cmd[i] = cmd[i].replace(OUTPUT_FILE_TOKEN, output.getPath());
-           }
+        for (int i = 0; i < cmd.length; i++) {
+            if (cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) {
+                cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath());
+                inputToStdIn = false;
+            }
+            if (cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) {
+                output = tmp.createTemporaryFile();
+                outputFromStdOut = false;
+                cmd[i] = cmd[i].replace(OUTPUT_FILE_TOKEN, output.getPath());
+            }
         }
 
         // Execute
         Process process = null;
-      try{
-        if(cmd.length == 1) {
-           process = Runtime.getRuntime().exec( cmd[0] );
-        } else {
-           process = Runtime.getRuntime().exec( cmd );
+        try {
+            if (cmd.length == 1) {
+                process = Runtime.getRuntime().exec(cmd[0]);
+            } else {
+                process = Runtime.getRuntime().exec(cmd);
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
         }
-      }
-      catch(Exception e){
-    	  e.printStackTrace();
-      }
 
         try {
-            if(inputToStdIn) {
-               sendInput(process, stream);
+            if (inputToStdIn) {
+                sendInput(process, stream);
             } else {
-               process.getOutputStream().close();
+                process.getOutputStream().close();
             }
 
             InputStream out = process.getInputStream();
             InputStream err = process.getErrorStream();
-            
-            if(hasPatterns) {
-               extractMetadata(err, metadata);
-               
-               if(outputFromStdOut) {
-                  extractOutput(out, xhtml);
-               } else {
-                  extractMetadata(out, metadata);
-               }
+
+            if (hasPatterns) {
+                extractMetadata(err, metadata);
+
+                if (outputFromStdOut) {
+                    extractOutput(out, xhtml);
+                } else {
+                    extractMetadata(out, metadata);
+                }
             } else {
-               ignoreStream(err);
-               
-               if(outputFromStdOut) {
-                  extractOutput(out, xhtml);
-               } else {
-                  ignoreStream(out);
-               }
+                ignoreStream(err);
+
+                if (outputFromStdOut) {
+                    extractOutput(out, xhtml);
+                } else {
+                    ignoreStream(out);
+                }
             }
         } finally {
             try {
@@ -282,9 +287,9 @@ public class ExternalParser extends AbstractParser {
      * The standard output stream is closed once fully processed.
      *
      * @param process process
-     * @param xhtml XHTML content handler
+     * @param xhtml   XHTML content handler
      * @throws SAXException if the XHTML SAX events could not be handled
-     * @throws IOException if an input error occurred
+     * @throws IOException  if an input error occurred
      */
     private void extractOutput(InputStream stream, XHTMLContentHandler xhtml)
             throws SAXException, IOException {
@@ -308,7 +313,7 @@ public class ExternalParser extends AbstractParser {
      * closed by this method.
      *
      * @param process process
-     * @param stream input stream
+     * @param stream  input stream
      */
     private void sendInput(final Process process, final InputStream stream) {
         Thread t = new Thread() {
@@ -321,10 +326,10 @@ public class ExternalParser extends AbstractParser {
             }
         };
         t.start();
-        try{
-     	   t.join();
+        try {
+            t.join();
+        } catch (InterruptedException ignore) {
         }
-        catch(InterruptedException ignore){}        
     }
 
 
@@ -333,6 +338,7 @@ public class ExternalParser extends AbstractParser {
      * standard stream of the given process. Potential exceptions
      * are ignored, and the stream is closed once fully processed.
      * Note: calling this starts a new thread and blocks the current(caller) thread until the new thread dies
+     *
      * @param stream stream to be ignored
      */
     private static void ignoreStream(final InputStream stream) {
@@ -343,7 +349,8 @@ public class ExternalParser extends AbstractParser {
      * Starts a thread that reads and discards the contents of the
      * standard stream of the given process. Potential exceptions
      * are ignored, and the stream is closed once fully processed.
-     * @param stream stream to sent to black hole (a k a null)
+     *
+     * @param stream       stream to sent to black hole (a k a null)
      * @param waitForDeath when {@code true} the caller thread will be blocked till the death of new thread.
      * @return The thread that is created and started
      */
@@ -362,99 +369,99 @@ public class ExternalParser extends AbstractParser {
         if (waitForDeath) {
             try {
                 t.join();
-            } catch (InterruptedException ignore) {}
+            } catch (InterruptedException ignore) {
+            }
         }
         return t;
     }
-    
+
     private void extractMetadata(final InputStream stream, final Metadata metadata) {
-       Thread t = new Thread() {
-          public void run() {
-             BufferedReader reader;
-              reader = new BufferedReader(new InputStreamReader(stream, UTF_8));
-             try {
-                String line;
-                while ( (line = reader.readLine()) != null ) {
-                    boolean consumed = false;
-                   for(Pattern p : metadataPatterns.keySet()) {
-                      Matcher m = p.matcher(line);
-                      if(m.find()) {
-                          consumed = true;
-                    	 if (metadataPatterns.get(p) != null && 
-                    			 !metadataPatterns.get(p).equals("")){
-                                   metadata.add( metadataPatterns.get(p), m.group(1) );
-                    	 }
-                    	 else{
-                    		 metadata.add( m.group(1), m.group(2));
-                    	 }
-                      }
-                   }
-                    if (!consumed) {
-                        ignoredLineConsumer.consume(line);
+        Thread t = new Thread() {
+            public void run() {
+                BufferedReader reader;
+                reader = new BufferedReader(new InputStreamReader(stream, UTF_8));
+                try {
+                    String line;
+                    while ((line = reader.readLine()) != null) {
+                        boolean consumed = false;
+                        for (Pattern p : metadataPatterns.keySet()) {
+                            Matcher m = p.matcher(line);
+                            if (m.find()) {
+                                consumed = true;
+                                if (metadataPatterns.get(p) != null &&
+                                        !metadataPatterns.get(p).equals("")) {
+                                    metadata.add(metadataPatterns.get(p), m.group(1));
+                                } else {
+                                    metadata.add(m.group(1), m.group(2));
+                                }
+                            }
+                        }
+                        if (!consumed) {
+                            ignoredLineConsumer.consume(line);
+                        }
                     }
+                } catch (IOException e) {
+                    // Ignore
+                } finally {
+                    IOUtils.closeQuietly(reader);
+                    IOUtils.closeQuietly(stream);
                 }
-             } catch (IOException e) {
-                 // Ignore
-             } finally {
-                IOUtils.closeQuietly(reader);
-                IOUtils.closeQuietly(stream);
             }
-          }
-       };
-	   t.start();
-       try{
-    	   t.join();
-       }
-       catch(InterruptedException ignore){}
+        };
+        t.start();
+        try {
+            t.join();
+        } catch (InterruptedException ignore) {
+        }
     }
-    
+
     /**
      * Checks to see if the command can be run. Typically used with
-     *  something like "myapp --version" to check to see if "myapp"
-     *  is installed and on the path.
-     *  
-     * @param checkCmd The check command to run
-     * @param errorValue What is considered an error value? 
+     * something like "myapp --version" to check to see if "myapp"
+     * is installed and on the path.
+     *
+     * @param checkCmd   The check command to run
+     * @param errorValue What is considered an error value?
      */
     public static boolean check(String checkCmd, int... errorValue) {
-       return check(new String[] {checkCmd}, errorValue);
+        return check(new String[]{checkCmd}, errorValue);
     }
 
     public static boolean check(String[] checkCmd, int... errorValue) {
-       if(errorValue.length == 0) {
-          errorValue = new int[] { 127 };
-       }
-       
-       try {
-          Process process= Runtime.getRuntime().exec(checkCmd);
-          Thread stdErrSuckerThread = ignoreStream(process.getErrorStream(), false);
-          Thread stdOutSuckerThread = ignoreStream(process.getInputStream(), false);
-          stdErrSuckerThread.join();
-          stdOutSuckerThread.join();
-          int result = process.waitFor();
-          for(int err : errorValue) {
-             if(result == err) return false;
-          }
-          return true;
-       } catch(IOException e) {
-          // Some problem, command is there or is broken
-          return false;
-       } catch (InterruptedException ie) {
-          // Some problem, command is there or is broken
-          return false;
-       } catch (SecurityException se) {
-          // External process execution is banned by the security manager
-          return false;
-       } catch (Error err) {
-           if (err.getMessage() != null && 
-               (err.getMessage().contains("posix_spawn") || 
-               err.getMessage().contains("UNIXProcess"))) {
-               //"Error forking command due to JVM locale bug 
-               //(see TIKA-1526 and SOLR-6387)"
-               return false;
-           }
-           //throw if a different kind of error
-           throw err;
-       }
+        if (errorValue.length == 0) {
+            errorValue = new int[]{127};
+        }
+
+        try {
+            Process process = Runtime.getRuntime().exec(checkCmd);
+            Thread stdErrSuckerThread = ignoreStream(process.getErrorStream(), false);
+            Thread stdOutSuckerThread = ignoreStream(process.getInputStream(), false);
+            stdErrSuckerThread.join();
+            stdOutSuckerThread.join();
+            int result = process.waitFor();
+            for (int err : errorValue) {
+                if (result == err) return false;
+            }
+            return true;
+        } catch (IOException e) {
+            // Some problem, command is there or is broken
+            return false;
+        } catch (InterruptedException ie) {
+            // Some problem, command is there or is broken
+            return false;
+        } catch (SecurityException se) {
+            // External process execution is banned by the security manager
+            return false;
+        } catch (Error err) {
+            if (err.getMessage() != null &&
+                    (err.getMessage().contains("posix_spawn") ||
+                            err.getMessage().contains("UNIXProcess"))) {
+                //"Error forking command due to JVM locale bug
+                //(see TIKA-1526 and SOLR-6387)"
+                return false;
+            }
+            //throw if a different kind of error
+            throw err;
+        }
     }
 }

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.