You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/02/01 18:42:50 UTC
svn commit: r905345 - in /lucene/tika/trunk:
tika-core/src/main/java/org/apache/tika/detect/
tika-core/src/main/java/org/apache/tika/language/
tika-core/src/main/java/org/apache/tika/metadata/
tika-core/src/main/java/org/apache/tika/mime/ tika-core/src...
Author: jukka
Date: Mon Feb 1 17:42:49 2010
New Revision: 905345
URL: http://svn.apache.org/viewvc?rev=905345&view=rev
Log:
TIKA-375: Improve code quality metrics
Various trivial improvements
Added:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java
- copied, changed from r905333, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html
Removed:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java Mon Feb 1 17:42:49 2010
@@ -121,7 +121,7 @@
try {
name = URLDecoder.decode(name, "UTF-8");
} catch (UnsupportedEncodingException e) {
- throw new AssertionError("UTF-8 not supported");
+ throw new IllegalStateException("UTF-8 not supported", e);
}
}
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java Mon Feb 1 17:42:49 2010
@@ -21,7 +21,6 @@
import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.tika.sax.OfflineContentHandler;
@@ -31,7 +30,7 @@
import org.xml.sax.helpers.DefaultHandler;
/**
- * Utility class that uses a {@link SAXParser} to determine
+ * Utility class that uses a {@link javax.xml.parsers.SAXParser} to determine
* the namespace URI and local name of the root element of an XML file.
*
* @since Apache Tika 0.4
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java Mon Feb 1 17:42:49 2010
@@ -38,7 +38,7 @@
/**
* The available language profiles.
*/
- private static final Map<String, LanguageProfile> profiles =
+ private static final Map<String, LanguageProfile> PROFILES =
new HashMap<String, LanguageProfile>();
private static void addProfile(String language) {
@@ -64,7 +64,7 @@
stream.close();
}
- profiles.put(language, profile);
+ PROFILES.put(language, profile);
} catch (Throwable t) {
// Failed to load this language profile. Log the problem?
}
@@ -98,7 +98,7 @@
public LanguageIdentifier(LanguageProfile profile) {
String minLanguage = "unknown";
double minDistance = 1.0;
- for (Map.Entry<String, LanguageProfile> entry : profiles.entrySet()) {
+ for (Map.Entry<String, LanguageProfile> entry : PROFILES.entrySet()) {
double distance = profile.distance(entry.getValue());
if (distance < minDistance) {
minDistance = distance;
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MetadataHelper.java Mon Feb 1 17:42:49 2010
@@ -16,7 +16,6 @@
*/
package org.apache.tika.metadata;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
@@ -42,7 +41,7 @@
* @param url the URL of the resource to be read
* @param metadata where the resource metadata is stored
* @return resource content
- * @throws FileNotFoundException if the URL can not be accessed
+ * @throws IOException if the URL can not be accessed
*/
public static InputStream getInputStream(URL url, Metadata metadata)
throws IOException {
Copied: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java (from r905333, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html)
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java?p2=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java&p1=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html&r1=905333&r2=905345&rev=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package.html (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/package-info.java Mon Feb 1 17:42:49 2010
@@ -1,22 +1,21 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<body>
-A Multi-valued Metadata container, and set
-of constant fields for Tika Metadata.
-</body>
-</html>
+/**
+ * Multi-valued metadata container, and set of constant metadata fields.
+ */
+package org.apache.tika.metadata;
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/HexCoDec.java Mon Feb 1 17:42:49 2010
@@ -23,7 +23,7 @@
*/
public class HexCoDec {
- private final static char[] HEX_CHARS = { '0', '1', '2', '3', '4', '5',
+ private static final char[] HEX_CHARS = { '0', '1', '2', '3', '4', '5',
'6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
/**
@@ -106,14 +106,15 @@
* Internal method to turn a hex char into a nibble.
*/
private static int hexCharToNibble(char ch) {
- if ((ch >= '0') && (ch <= '9'))
+ if ((ch >= '0') && (ch <= '9')) {
return ch - '0';
- else if ((ch >= 'a') && (ch <= 'f'))
+ } else if ((ch >= 'a') && (ch <= 'f')) {
return ch - 'a' + 10;
- else if ((ch >= 'A') && (ch <= 'F'))
+ } else if ((ch >= 'A') && (ch <= 'F')) {
return ch - 'A' + 10;
- else
+ } else {
throw new IllegalArgumentException("Not a hex char - '" + ch + "'");
+ }
}
}
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Mon Feb 1 17:42:49 2010
@@ -55,17 +55,17 @@
/**
* Name of the {@link #rootMimeType root} type, application/octet-stream.
*/
- public final static String OCTET_STREAM = "application/octet-stream";
+ public static final String OCTET_STREAM = "application/octet-stream";
/**
* Name of the {@link #textMimeType text} type, text/plain.
*/
- public final static String PLAIN_TEXT = "text/plain";
+ public static final String PLAIN_TEXT = "text/plain";
/**
* Name of the {@link #xml xml} type, application/xml.
*/
- public final static String XML = "application/xml";
+ public static final String XML = "application/xml";
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java Mon Feb 1 17:42:49 2010
@@ -18,7 +18,6 @@
import java.io.IOException;
import java.io.InputStream;
-import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -57,9 +56,10 @@
/**
* The parse() method from Tika 0.4 and earlier. Please use the
- * {@link #parse(InputStream, ContentHandler, Metadata, Map)} method
- * instead in new code. Calls to this backwards compatibility method
- * are forwarded to the new parse() method with an empty parse context.
+ * {@link #parse(InputStream, ContentHandler, Metadata, ParseContext)}
+ * method instead in new code. Calls to this backwards compatibility
+ * method are forwarded to the new parse() method with an empty parse
+ * context.
*
* @deprecated This method will be removed in Apache Tika 1.0.
*/
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java Mon Feb 1 17:42:49 2010
@@ -19,7 +19,6 @@
import java.io.IOException;
import org.apache.poi.POIXMLDocument;
-import org.apache.poi.POIXMLTextExtractor;
import org.apache.tika.metadata.Metadata;
import org.apache.xmlbeans.XmlException;
import org.xml.sax.ContentHandler;
@@ -28,7 +27,7 @@
/**
* Interface implemented by all Tika OOXML extractors.
*
- * @see POIXMLTextExtractor
+ * @see org.apache.poi.POIXMLTextExtractor
*/
public interface OOXMLExtractor {
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java Mon Feb 1 17:42:49 2010
@@ -238,13 +238,8 @@
}
public boolean hasNext() {
- if (offset < data.length) {
- // Check for padding at the end
- if (data[offset] != 0) {
- return true;
- }
- }
- return false;
+ // Check for padding at the end
+ return offset < data.length && data[offset] != 0;
}
public RawTag next() {
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java Mon Feb 1 17:42:49 2010
@@ -47,7 +47,7 @@
super(handler);
}
- private final String mapOldNS(String ns) {
+ private String mapOldNS(String ns) {
if (ns != null && ns.startsWith(OLD_NS)) {
return NEW_NS + ns.substring(OLD_NS.length()) + ":1.0";
} else {
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java Mon Feb 1 17:42:49 2010
@@ -282,11 +282,10 @@
}
// special handling of tabulators
- if (TEXT_NS.equals(namespaceURI)) {
- if ("tab-stop".equals(localName)
- || "tab".equals(localName)) {
- this.characters(TAB, 0, TAB.length);
- }
+ if (TEXT_NS.equals(namespaceURI)
+ && ("tab-stop".equals(localName)
+ || "tab".equals(localName))) {
+ this.characters(TAB, 0, TAB.length);
}
}
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java Mon Feb 1 17:42:49 2010
@@ -17,7 +17,6 @@
package org.apache.tika.parser.pkg;
import java.io.IOException;
-import java.io.InputStream;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveInputStream;
@@ -35,11 +34,10 @@
/**
* Abstract base class for parsers that deal with package formats.
* Subclasses can call the
- * {@link #parseEntry(InputStream, XHTMLContentHandler, Metadata)}
- * method to parse the given package entry using the configured
- * entry parser. The entries will be written to the XHTML event stream
- * as <div class="package-entry"> elements that contain the
- * (optional) entry name as a <h1> element and the full
+ * {@link #parseArchive(ArchiveInputStream, ContentHandler, Metadata, ParseContext)}
+ * method to parse the package stream. Package entries will be written
+ * to the XHTML event stream as <div class="package-entry"> elements
+ * that contain the (optional) entry name as a <h1> element and the full
* structured body content of the parsed entry.
*/
public abstract class PackageParser extends DelegatingParser {
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java Mon Feb 1 17:42:49 2010
@@ -26,8 +26,6 @@
import java.nio.charset.Charset;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.DublinCore;
-import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
@@ -40,17 +38,18 @@
* Plain text parser. The text encoding of the document stream is
* automatically detected based on the byte patterns found at the
* beginning of the stream. The input metadata key
- * {@link HttpHeaders#CONTENT_ENCODING} is used as an encoding hint
- * if the automatic encoding detection fails.
+ * {@link org.apache.tika.metadata.HttpHeaders#CONTENT_ENCODING} is used
+ * as an encoding hint if the automatic encoding detection fails.
* <p>
* This parser sets the following output metadata entries:
* <dl>
- * <dt>{@link HttpHeaders#CONTENT_TYPE}</dt>
+ * <dt>{@link org.apache.tika.metadata.HttpHeaders#CONTENT_TYPE}</dt>
* <dd><code>text/plain</code></dd>
- * <dt>{@link HttpHeaders#CONTENT_ENCODING}</dt>
+ * <dt>{@link org.apache.tika.metadata.HttpHeaders#CONTENT_ENCODING}</dt>
* <dd>The detected text encoding of the document.</dd>
* <dt>
- * {@link HttpHeaders#CONTENT_LANGUAGE} and {@link DublinCore#LANGUAGE}
+ * {@link org.apache.tika.metadata.HttpHeaders#CONTENT_LANGUAGE} and
+ * {@link org.apache.tika.metadata.DublinCore#LANGUAGE}
* </dt>
* <dd>
* The default language of the detected encoding. Only set if the
Modified: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java?rev=905345&r1=905344&r2=905345&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java (original)
+++ lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java Mon Feb 1 17:42:49 2010
@@ -117,8 +117,7 @@
int size = input.readUnsignedShort();
byte[] chars = new byte[size];
input.readFully(chars);
- String value = new String(chars);
- return value;
+ return new String(chars);
}
private Object readAMFObject(DataInputStream input) throws IOException {