You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/04/14 15:02:12 UTC

svn commit: r933936 - in /lucene/tika/trunk/tika-parsers: pom.xml src/main/java/org/apache/tika/parser/mbox/MboxParser.java

Author: jukka
Date: Wed Apr 14 13:02:12 2010
New Revision: 933936

URL: http://svn.apache.org/viewvc?rev=933936&view=rev
Log:
TIKA-403: Refactor log library usage in tika-parsers

Remove direct log4j dependency as suggested by Attila Király

Modified:
    lucene/tika/trunk/tika-parsers/pom.xml
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java

Modified: lucene/tika/trunk/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/pom.xml?rev=933936&r1=933935&r2=933936&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/pom.xml (original)
+++ lucene/tika/trunk/tika-parsers/pom.xml Wed Apr 14 13:02:12 2010
@@ -125,11 +125,6 @@
       <version>3.1</version>
     </dependency>
     <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
-      <version>1.2.14</version>
-    </dependency>
-    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java?rev=933936&r1=933935&r2=933936&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java Wed Apr 14 13:02:12 2010
@@ -26,7 +26,6 @@ import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.log4j.Logger;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -42,8 +41,6 @@ import org.xml.sax.SAXException;
  */
 public class MboxParser implements Parser {
 
-    private static final Logger LOGGER = Logger.getLogger(MboxParser.class);
-
     private static final Set<MediaType> SUPPORTED_TYPES =
         Collections.singleton(MediaType.application("mbox"));
 
@@ -70,10 +67,9 @@ public class MboxParser implements Parse
         InputStreamReader isr;
         try {
             // Headers are going to be 7-bit ascii
-            isr = new InputStreamReader(stream, "us-ascii");
+            isr = new InputStreamReader(stream, "US-ASCII");
         } catch (UnsupportedEncodingException e) {
-            LOGGER.error("Unexpected exception setting up MboxParser", e);
-            isr = new InputStreamReader(stream);
+            throw new TikaException("US-ASCII is not supported!", e);
         }
 
         BufferedReader reader = new BufferedReader(isr);
@@ -186,8 +182,7 @@ public class MboxParser implements Parse
 
         Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine);
         if (!headerMatcher.matches()) {
-            LOGGER.warn("Malformed email header in mbox file: " + curLine);
-            return;
+            return; // ignore malformed header lines
         }
 
         String headerTag = headerMatcher.group(1).toLowerCase();