You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/02/01 18:42:50 UTC

svn commit: r905345 - in /lucene/tika/trunk: tika-core/src/main/java/org/apache/tika/detect/ tika-core/src/main/java/org/apache/tika/language/ tika-core/src/main/java/org/apache/tika/metadata/ tika-core/src/main/java/org/apache/tika/mime/ tika-core/src...

Author: jukka
Date: Mon Feb  1 17:42:49 2010
New Revision: 905345

URL: http://svn.apache.org/viewvc?rev=905345&view=rev
Log:
TIKA-375: Improve code quality metrics

Various trivial improvements

Added:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java
      - copied, changed from r905333, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html
Removed:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html
Modified:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java Mon Feb  1 17:42:49 2010
@@ -121,7 +121,7 @@
                 try {
                     name = URLDecoder.decode(name, "UTF-8");
                 } catch (UnsupportedEncodingException e) {
-                    throw new AssertionError("UTF-8 not supported");
+                    throw new IllegalStateException("UTF-8 not supported", e);
                 }
             }
 

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java Mon Feb  1 17:42:49 2010
@@ -21,7 +21,6 @@
 import javax.xml.XMLConstants;
 import javax.xml.namespace.QName;
 import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 
 import org.apache.tika.sax.OfflineContentHandler;
@@ -31,7 +30,7 @@
 import org.xml.sax.helpers.DefaultHandler;
 
 /**
- * Utility class that uses a {@link SAXParser} to determine
+ * Utility class that uses a {@link javax.xml.parsers.SAXParser} to determine
  * the namespace URI and local name of the root element of an XML file.
  *
  * @since Apache Tika 0.4

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java Mon Feb  1 17:42:49 2010
@@ -38,7 +38,7 @@
     /**
      * The available language profiles.
      */
-    private static final Map<String, LanguageProfile> profiles =
+    private static final Map<String, LanguageProfile> PROFILES =
         new HashMap<String, LanguageProfile>();
 
     private static void addProfile(String language) {
@@ -64,7 +64,7 @@
                 stream.close();
             }
 
-            profiles.put(language, profile);
+            PROFILES.put(language, profile);
         } catch (Throwable t) {
             // Failed to load this language profile. Log the problem?
         }
@@ -98,7 +98,7 @@
     public LanguageIdentifier(LanguageProfile profile) {
         String minLanguage = "unknown";
         double minDistance = 1.0;
-        for (Map.Entry<String, LanguageProfile> entry : profiles.entrySet()) {
+        for (Map.Entry<String, LanguageProfile> entry : PROFILES.entrySet()) {
             double distance = profile.distance(entry.getValue());
             if (distance < minDistance) {
                 minDistance = distance;

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java Mon Feb  1 17:42:49 2010
@@ -16,7 +16,6 @@
  */
 package org.apache.tika.metadata;
 
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
@@ -42,7 +41,7 @@
      * @param url the URL of the resource to be read
      * @param metadata where the resource metadata is stored
      * @return resource content
-     * @throws FileNotFoundException if the URL can not be accessed
+     * @throws IOException if the URL can not be accessed
      */
     public static InputStream getInputStream(URL url, Metadata metadata)
             throws IOException {

Copied: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java (from r905333, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html)
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java?p2=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java&p1=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html&r1=905333&r2=905345&rev=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java Mon Feb  1 17:42:49 2010
@@ -1,22 +1,21 @@
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-<html>
-<body>
-A Multi-valued Metadata container, and set
-of constant fields for Tika Metadata.
-</body>
-</html>
+/**
+ * Multi-valued metadata container, and set of constant metadata fields.
+ */
+package org.apache.tika.metadata;

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java Mon Feb  1 17:42:49 2010
@@ -23,7 +23,7 @@
  */
 public class HexCoDec {
 
-    private final static char[] HEX_CHARS = { '0', '1', '2', '3', '4', '5',
+    private static final char[] HEX_CHARS = { '0', '1', '2', '3', '4', '5',
             '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
 
     /**
@@ -106,14 +106,15 @@
      * Internal method to turn a hex char into a nibble.
      */
     private static int hexCharToNibble(char ch) {
-        if ((ch >= '0') && (ch <= '9'))
+        if ((ch >= '0') && (ch <= '9')) {
             return ch - '0';
-        else if ((ch >= 'a') && (ch <= 'f'))
+        } else if ((ch >= 'a') && (ch <= 'f')) {
             return ch - 'a' + 10;
-        else if ((ch >= 'A') && (ch <= 'F'))
+        } else if ((ch >= 'A') && (ch <= 'F')) {
             return ch - 'A' + 10;
-        else
+        } else {
             throw new IllegalArgumentException("Not a hex char - '" + ch + "'");
+        }
     }
 
 }

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Mon Feb  1 17:42:49 2010
@@ -55,17 +55,17 @@
     /**
      * Name of the {@link #rootMimeType root} type, application/octet-stream.
      */
-    public final static String OCTET_STREAM = "application/octet-stream";
+    public static final String OCTET_STREAM = "application/octet-stream";
 
     /**
      * Name of the {@link #textMimeType text} type, text/plain.
      */
-    public final static String PLAIN_TEXT = "text/plain";
+    public static final String PLAIN_TEXT = "text/plain";
     
     /**
      * Name of the {@link #xml xml} type, application/xml.
      */
-    public final static String XML = "application/xml";
+    public static final String XML = "application/xml";
 
 
     

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java Mon Feb  1 17:42:49 2010
@@ -18,7 +18,6 @@
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Map;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -57,9 +56,10 @@
 
     /**
      * The parse() method from Tika 0.4 and earlier. Please use the
-     * {@link #parse(InputStream, ContentHandler, Metadata, Map)} method
-     * instead in new code. Calls to this backwards compatibility method
-     * are forwarded to the new parse() method with an empty parse context.
+     * {@link #parse(InputStream, ContentHandler, Metadata, ParseContext)}
+     * method instead in new code. Calls to this backwards compatibility
+     * method are forwarded to the new parse() method with an empty parse
+     * context.
      *
      * @deprecated This method will be removed in Apache Tika 1.0.
      */

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java Mon Feb  1 17:42:49 2010
@@ -19,7 +19,6 @@
 import java.io.IOException;
 
 import org.apache.poi.POIXMLDocument;
-import org.apache.poi.POIXMLTextExtractor;
 import org.apache.tika.metadata.Metadata;
 import org.apache.xmlbeans.XmlException;
 import org.xml.sax.ContentHandler;
@@ -28,7 +27,7 @@
 /**
  * Interface implemented by all Tika OOXML extractors.
  * 
- * @see POIXMLTextExtractor
+ * @see org.apache.poi.POIXMLTextExtractor
  */
 public interface OOXMLExtractor {
 

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java Mon Feb  1 17:42:49 2010
@@ -238,13 +238,8 @@
         }
 
         public boolean hasNext() {
-            if (offset < data.length) {
-                // Check for padding at the end
-                if (data[offset] != 0) {
-                    return true;
-                }
-            }
-            return false;
+            // Check for padding at the end
+            return offset < data.length && data[offset] != 0;
         }
 
         public RawTag next() {

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java Mon Feb  1 17:42:49 2010
@@ -47,7 +47,7 @@
         super(handler);
     }
 
-    private final String mapOldNS(String ns) {
+    private String mapOldNS(String ns) {
         if (ns != null && ns.startsWith(OLD_NS)) {
             return NEW_NS + ns.substring(OLD_NS.length()) + ":1.0";
         } else {

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java Mon Feb  1 17:42:49 2010
@@ -282,11 +282,10 @@
                     }
 
                     // special handling of tabulators
-                    if (TEXT_NS.equals(namespaceURI)) {
-                        if ("tab-stop".equals(localName)
-                                || "tab".equals(localName)) {
-                            this.characters(TAB, 0, TAB.length);
-                        }
+                    if (TEXT_NS.equals(namespaceURI)
+                            && ("tab-stop".equals(localName)
+                                    || "tab".equals(localName))) {
+                        this.characters(TAB, 0, TAB.length);
                     }
                 }
 

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java Mon Feb  1 17:42:49 2010
@@ -17,7 +17,6 @@
 package org.apache.tika.parser.pkg;
 
 import java.io.IOException;
-import java.io.InputStream;
 
 import org.apache.commons.compress.archivers.ArchiveEntry;
 import org.apache.commons.compress.archivers.ArchiveInputStream;
@@ -35,11 +34,10 @@
 /**
  * Abstract base class for parsers that deal with package formats.
  * Subclasses can call the
- * {@link #parseEntry(InputStream, XHTMLContentHandler, Metadata)}
- * method to parse the given package entry using the configured
- * entry parser. The entries will be written to the XHTML event stream
- * as &lt;div class="package-entry"&gt; elements that contain the
- * (optional) entry name as a &lt;h1&gt; element and the full
+ * {@link #parseArchive(ArchiveInputStream, ContentHandler, Metadata, ParseContext)}
+ * method to parse the package stream. Package entries will be written
+ * to the XHTML event stream as &lt;div class="package-entry"&gt; elements
+ * that contain the (optional) entry name as a &lt;h1&gt; element and the full
  * structured body content of the parsed entry.
  */
 public abstract class PackageParser extends DelegatingParser {

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java Mon Feb  1 17:42:49 2010
@@ -26,8 +26,6 @@
 import java.nio.charset.Charset;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.DublinCore;
-import org.apache.tika.metadata.HttpHeaders;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
@@ -40,17 +38,18 @@
  * Plain text parser. The text encoding of the document stream is
  * automatically detected based on the byte patterns found at the
  * beginning of the stream. The input metadata key
- * {@link HttpHeaders#CONTENT_ENCODING} is used as an encoding hint
- * if the automatic encoding detection fails.
+ * {@link org.apache.tika.metadata.HttpHeaders#CONTENT_ENCODING} is used
+ * as an encoding hint if the automatic encoding detection fails.
  * <p>
  * This parser sets the following output metadata entries:
  * <dl>
- *   <dt>{@link HttpHeaders#CONTENT_TYPE}</dt>
+ *   <dt>{@link org.apache.tika.metadata.HttpHeaders#CONTENT_TYPE}</dt>
  *   <dd><code>text/plain</code></dd>
- *   <dt>{@link HttpHeaders#CONTENT_ENCODING}</dt>
+ *   <dt>{@link org.apache.tika.metadata.HttpHeaders#CONTENT_ENCODING}</dt>
  *   <dd>The detected text encoding of the document.</dd>
  *   <dt>
- *     {@link HttpHeaders#CONTENT_LANGUAGE} and {@link DublinCore#LANGUAGE}
+ *     {@link org.apache.tika.metadata.HttpHeaders#CONTENT_LANGUAGE} and
+ *     {@link org.apache.tika.metadata.DublinCore#LANGUAGE}
  *   </dt>
  *   <dd>
  *     The default language of the detected encoding. Only set if the

Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java Mon Feb  1 17:42:49 2010
@@ -117,8 +117,7 @@
         int size = input.readUnsignedShort();
         byte[] chars = new byte[size];
         input.readFully(chars);
-        String value = new String(chars);
-        return value;
+        return new String(chars);
     }
 
     private Object readAMFObject(DataInputStream input) throws IOException {