You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/20 11:51:45 UTC
svn commit: r1696745 [1/2] - in
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser: audio/
chm/accessor/ chm/core/ code/ crypto/ ctakes/ dif/ envi/ epub/ feed/ gdal/
geo/topic/ hdf/ html/ image/ image/xmp/ iptc/ isatab/ iwork/ jdbc/ mail/
ma...
Author: nick
Date: Thu Aug 20 09:51:44 2015
New Revision: 1696745
URL: http://svn.apache.org/r1696745
Log:
TIKA-1710 patch from Yaniv Kunda - Use Commons IO instead of the Tika Core IO copies, and java.nio.charset.StandardCharsets
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java Thu Aug 20 09:51:44 2015
@@ -41,6 +41,8 @@ import org.apache.tika.sax.XHTMLContentH
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+
public class MidiParser extends AbstractParser {
/** Serial version UID */
@@ -101,7 +103,7 @@ public class MidiParser extends Abstract
if (meta.getType() >= 1 && meta.getType() <= 15) {
// FIXME: What's the encoding?
xhtml.characters(
- new String(meta.getData(), "ISO-8859-1"));
+ new String(meta.getData(), ISO_8859_1));
}
}
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java Thu Aug 20 09:51:44 2015
@@ -20,11 +20,12 @@ import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.chm.core.ChmCommons;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Holds chm listing entries
*/
@@ -234,7 +235,7 @@ public class ChmDirectoryListingSet {
dle.setNameLength(strlen);
dle.setName(new String(ChmCommons.copyOfRange(
dir_chunk, placeHolder,
- (placeHolder + dle.getNameLength())), IOUtils.UTF_8));
+ (placeHolder + dle.getNameLength())), UTF_8));
checkControlData(dle);
checkResetTable(dle);
@@ -255,9 +256,9 @@ public class ChmDirectoryListingSet {
}
// int indexWorkData = ChmCommons.indexOf(dir_chunk,
-// "::".getBytes("UTF-8"));
+// "::".getBytes(UTF_8));
// int indexUserData = ChmCommons.indexOf(dir_chunk,
-// "/".getBytes("UTF-8"));
+// "/".getBytes(UTF_8));
//
// if (indexUserData>=0 && indexUserData < indexWorkData)
// setPlaceHolder(indexUserData);
@@ -285,7 +286,7 @@ public class ChmDirectoryListingSet {
// // dle.getNameLength()))));
// dle.setName(new String(ChmCommons.copyOfRange(
// dir_chunk, placeHolder,
-// (placeHolder + dle.getNameLength())), "UTF-8"));
+// (placeHolder + dle.getNameLength())), UTF_8));
// checkControlData(dle);
// checkResetTable(dle);
// setPlaceHolder(placeHolder
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java Thu Aug 20 09:51:44 2015
@@ -19,11 +19,12 @@ package org.apache.tika.parser.chm.acces
import java.math.BigInteger;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.chm.assertion.ChmAssert;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* The Header 0000: char[4] 'ITSF' 0004: DWORD 3 (Version number) 0008: DWORD
* Total header length, including header section table and following data. 000C:
@@ -62,7 +63,7 @@ public class ChmItsfHeader implements Ch
private int currentPlace = 0;
public ChmItsfHeader() {
- signature = ChmConstants.ITSF.getBytes(IOUtils.UTF_8); /* 0 (ITSF) */
+ signature = ChmConstants.ITSF.getBytes(UTF_8); /* 0 (ITSF) */
}
/**
@@ -70,7 +71,7 @@ public class ChmItsfHeader implements Ch
*/
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append(new String(getSignature(), IOUtils.UTF_8) + " ");
+ sb.append(new String(getSignature(), UTF_8) + " ");
sb.append(getVersion() + " ");
sb.append(getHeaderLen() + " ");
sb.append(getUnknown_000c() + " ");
@@ -463,7 +464,7 @@ public class ChmItsfHeader implements Ch
chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
- if (!new String(chmItsfHeader.getSignature(), IOUtils.UTF_8).equals(ChmConstants.ITSF))
+ if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF))
throw new TikaException("seems not valid file");
if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java Thu Aug 20 09:51:44 2015
@@ -16,15 +16,14 @@
*/
package org.apache.tika.parser.chm.accessor;
-import java.io.UnsupportedEncodingException;
-
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.chm.assertion.ChmAssert;
import org.apache.tika.parser.chm.core.ChmCommons;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Directory header The directory starts with a header; its format is as
* follows: 0000: char[4] 'ITSP' 0004: DWORD Version number 1 0008: DWORD Length
@@ -69,16 +68,16 @@ public class ChmItspHeader implements Ch
private int currentPlace = 0;
public ChmItspHeader() {
- signature = ChmConstants.ITSP.getBytes(IOUtils.UTF_8); /*
- * 0
- * (ITSP
- * )
- */
+ signature = ChmConstants.ITSP.getBytes(UTF_8); /*
+ * 0
+ * (ITSP
+ * )
+ */
}
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append("[ signature:=" + new String(getSignature(), IOUtils.UTF_8)
+ sb.append("[ signature:=" + new String(getSignature(), UTF_8)
+ System.getProperty("line.separator"));
sb.append("version:=\t" + getVersion()
+ System.getProperty("line.separator"));
@@ -537,7 +536,7 @@ public class ChmItspHeader implements Ch
ChmConstants.BYTE_ARRAY_LENGHT));
/* Checks validity of the itsp header */
- if (!new String(chmItspHeader.getSignature(), IOUtils.UTF_8).equals(ChmConstants.ITSP))
+ if (!new String(chmItspHeader.getSignature(), UTF_8).equals(ChmConstants.ITSP))
throw new ChmParsingException("seems not valid signature");
if (chmItspHeader.getVersion() != ChmConstants.CHM_VER_1)
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java Thu Aug 20 09:51:44 2015
@@ -16,14 +16,13 @@
*/
package org.apache.tika.parser.chm.accessor;
-import java.io.UnsupportedEncodingException;
-
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.chm.assertion.ChmAssert;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
*
* ::DataSpace/Storage/<SectionName>/ControlData This file contains $20 bytes of
@@ -55,11 +54,11 @@ public class ChmLzxcControlData implemen
private int currentPlace = 0;
public ChmLzxcControlData() {
- signature = ChmConstants.LZXC.getBytes(IOUtils.UTF_8); /*
- * 4
- * (LZXC
- * )
- */
+ signature = ChmConstants.LZXC.getBytes(UTF_8); /*
+ * 4
+ * (LZXC
+ * )
+ */
}
/**
@@ -255,7 +254,7 @@ public class ChmLzxcControlData implemen
StringBuilder sb = new StringBuilder();
sb.append("size(unknown):=" + this.getSize() + ", ");
sb.append("signature(Compression type identifier):="
- + new String(this.getSignature(), IOUtils.UTF_8) + ", ");
+ + new String(this.getSignature(), UTF_8) + ", ");
sb.append("version(Possibly numeric code for LZX):="
+ this.getVersion() + System.getProperty("line.separator"));
sb.append("resetInterval(The Huffman reset interval):="
@@ -306,7 +305,7 @@ public class ChmLzxcControlData implemen
"window size / resetInterval should be more than 1");
/* checks a signature */
- if (!new String(chmLzxcControlData.getSignature(), IOUtils.UTF_8)
+ if (!new String(chmLzxcControlData.getSignature(), UTF_8)
.equals(ChmConstants.LZXC))
throw new ChmParsingException(
"the signature does not seem to be correct");
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java Thu Aug 20 09:51:44 2015
@@ -19,12 +19,13 @@ package org.apache.tika.parser.chm.acces
import java.util.Arrays;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.chm.assertion.ChmAssert;
import org.apache.tika.parser.chm.core.ChmCommons;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Description Note: not always exists An index chunk has the following format:
* 0000: char[4] 'PMGI' 0004: DWORD Length of quickref/free area at end of
@@ -54,7 +55,7 @@ public class ChmPmgiHeader implements Ch
private int currentPlace = 0;
public ChmPmgiHeader() {
- signature = ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8); /* 0 (PMGI) */
+ signature = ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8); /* 0 (PMGI) */
}
private int getDataRemained() {
@@ -81,7 +82,7 @@ public class ChmPmgiHeader implements Ch
ChmAssert.assertPositiveInt(count);
this.setDataRemained(data.length);
index = ChmCommons.indexOf(data,
- ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8));
+ ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8));
if (index >= 0)
System.arraycopy(data, index, chmPmgiHeader.getSignature(), 0, count);
@@ -149,7 +150,7 @@ public class ChmPmgiHeader implements Ch
*/
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append("signature:=" + new String(getSignature(), IOUtils.UTF_8) + ", ");
+ sb.append("signature:=" + new String(getSignature(), UTF_8) + ", ");
sb.append("free space:=" + getFreeSpace()
+ System.getProperty("line.separator"));
return sb.toString();
@@ -167,7 +168,7 @@ public class ChmPmgiHeader implements Ch
/* check structure */
if (!Arrays.equals(chmPmgiHeader.getSignature(),
- ChmConstants.CHM_PMGI_MARKER.getBytes(IOUtils.UTF_8)))
+ ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8)))
throw new TikaException(
"it does not seem to be valid a PMGI signature, check ChmItsp index_root if it was -1, means no PMGI, use PMGL insted");
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java Thu Aug 20 09:51:44 2015
@@ -17,11 +17,12 @@
package org.apache.tika.parser.chm.accessor;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.chm.assertion.ChmAssert;
import org.apache.tika.parser.chm.core.ChmConstants;
import org.apache.tika.parser.chm.exception.ChmParsingException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Description There are two types of directory chunks -- index chunks, and
* listing chunks. The index chunk will be omitted if there is only one listing
@@ -67,11 +68,11 @@ public class ChmPmglHeader implements Ch
private int currentPlace = 0;
public ChmPmglHeader() {
- signature = ChmConstants.PMGL.getBytes(IOUtils.UTF_8); /*
- * 0
- * (PMGL
- * )
- */
+ signature = ChmConstants.PMGL.getBytes(UTF_8); /*
+ * 0
+ * (PMGL
+ * )
+ */
}
private int getDataRemained() {
@@ -103,7 +104,7 @@ public class ChmPmglHeader implements Ch
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append("signatute:=" + new String(getSignature(), IOUtils.UTF_8) + ", ");
+ sb.append("signatute:=" + new String(getSignature(), UTF_8) + ", ");
sb.append("free space:=" + getFreeSpace() + ", ");
sb.append("unknown0008:=" + getUnknown0008() + ", ");
sb.append("prev block:=" + getBlockPrev() + ", ");
@@ -166,7 +167,7 @@ public class ChmPmglHeader implements Ch
chmPmglHeader.setBlockNext(chmPmglHeader.unmarshalInt32(data));
/* check structure */
- if (!new String(chmPmglHeader.getSignature(), IOUtils.UTF_8).equals(ChmConstants.PMGL))
+ if (!new String(chmPmglHeader.getSignature(), UTF_8).equals(ChmConstants.PMGL))
throw new ChmParsingException(ChmPmglHeader.class.getName()
+ " pmgl != pmgl.signature");
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java Thu Aug 20 09:51:44 2015
@@ -16,14 +16,14 @@
*/
package org.apache.tika.parser.chm.core;
-import org.apache.tika.io.IOUtils;
+import static java.nio.charset.StandardCharsets.UTF_8;
public class ChmConstants {
/* Prevents instantiation */
private ChmConstants() {
}
- public static final String DEFAULT_CHARSET = IOUtils.UTF_8.name();
+ public static final String DEFAULT_CHARSET = UTF_8.name();
public static final String ITSF = "ITSF";
public static final String ITSP = "ITSP";
public static final String PMGL = "PMGL";
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java Thu Aug 20 09:51:44 2015
@@ -21,8 +21,9 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
+
+import org.apache.commons.io.IOUtils;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
import org.apache.tika.parser.chm.accessor.ChmItspHeader;
@@ -34,6 +35,8 @@ import org.apache.tika.parser.chm.core.C
import org.apache.tika.parser.chm.lzx.ChmBlockInfo;
import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Extracts text from chm file. Enumerates chm entries.
*/
@@ -172,7 +175,7 @@ public class ChmExtractor {
int indexOfControlData = getChmDirList().getControlDataIndex();
int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
- ChmConstants.LZXC.getBytes(IOUtils.UTF_8));
+ ChmConstants.LZXC.getBytes(UTF_8));
byte[] dir_chunk = null;
if (indexOfResetData > 0)
dir_chunk = ChmCommons.copyOfRange( getData(), indexOfResetData, indexOfResetData
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java Thu Aug 20 09:51:44 2015
@@ -30,10 +30,10 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.config.ServiceLoader;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java Thu Aug 20 09:51:44 2015
@@ -20,8 +20,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.Set;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java Thu Aug 20 09:51:44 2015
@@ -22,7 +22,7 @@ import java.io.OutputStream;
import java.io.Serializable;
import java.util.Properties;
-import org.apache.tika.io.NullOutputStream;
+import static org.apache.commons.io.output.NullOutputStream.NULL_OUTPUT_STREAM;
/**
* Configuration for {@see CTAKESContentHandler}.
@@ -51,7 +51,7 @@ public class CTAKESConfig implements Ser
private CTAKESSerializer serializerType = CTAKESSerializer.XMI;
// OutputStream object used for CAS serialization
- private OutputStream stream = NullOutputStream.NULL_OUTPUT_STREAM;
+ private OutputStream stream = NULL_OUTPUT_STREAM;
// Enables CAS serialization
private boolean serialize = false;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dif/DIFParser.java Thu Aug 20 09:51:44 2015
@@ -23,13 +23,12 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.dif.DIFContentHandler;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.OfflineContentHandler;
import org.apache.tika.sax.TaggedContentHandler;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java Thu Aug 20 09:51:44 2015
@@ -23,8 +23,8 @@ import java.util.Collections;
import java.util.Set;
import java.nio.charset.Charset;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.detect.AutoDetectReader;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubContentParser.java Thu Aug 20 09:51:44 2015
@@ -26,8 +26,8 @@ import javax.xml.parsers.ParserConfigura
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java Thu Aug 20 09:51:44 2015
@@ -25,8 +25,8 @@ import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
+import org.apache.commons.io.IOUtils;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
@@ -40,6 +40,8 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Epub parser
*/
@@ -93,7 +95,7 @@ public class EpubParser extends Abstract
ZipEntry entry = zip.getNextEntry();
while (entry != null) {
if (entry.getName().equals("mimetype")) {
- String type = IOUtils.toString(zip, IOUtils.UTF_8.name());
+ String type = IOUtils.toString(zip, UTF_8);
metadata.set(Metadata.CONTENT_TYPE, type);
} else if (entry.getName().equals("metadata.xml")) {
meta.parse(zip, new DefaultHandler(), metadata, context);
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java Thu Aug 20 09:51:44 2015
@@ -23,8 +23,8 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java Thu Aug 20 09:51:44 2015
@@ -31,7 +31,6 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
@@ -43,6 +42,7 @@ import org.apache.tika.sax.XHTMLContentH
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.tika.parser.external.ExternalParser.INPUT_FILE_TOKEN;
//Tika imports
@@ -384,7 +384,7 @@ public class GDALParser extends Abstract
private String extractOutput(InputStream stream) throws SAXException,
IOException {
StringBuilder sb = new StringBuilder();
- Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
+ Reader reader = new InputStreamReader(stream, UTF_8);
try {
char[] buffer = new char[1024];
for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
@@ -399,8 +399,8 @@ public class GDALParser extends Abstract
private void processOutput(ContentHandler handler, Metadata metadata,
String output) throws SAXException, IOException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
- InputStream stream = new ByteArrayInputStream(output.getBytes(IOUtils.UTF_8));
- Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
+ InputStream stream = new ByteArrayInputStream(output.getBytes(UTF_8));
+ Reader reader = new InputStreamReader(stream, UTF_8);
try {
xhtml.startDocument();
xhtml.startElement("p");
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java Thu Aug 20 09:51:44 2015
@@ -35,6 +35,8 @@ import opennlp.tools.util.Span;
import org.apache.commons.io.IOUtils;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class NameEntityExtractor {
private String nerModelPath = null;
ArrayList<String> locationNameEntities;
@@ -63,7 +65,7 @@ public class NameEntityExtractor {
InputStream modelIn = new FileInputStream(nerModelPath);
TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
NameFinderME nameFinder = new NameFinderME(model);
- String[] in = IOUtils.toString(stream, "UTF-8").split(" ");
+ String[] in = IOUtils.toString(stream, UTF_8).split(" ");
Span nameE[] = nameFinder.find(in);
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java Thu Aug 20 09:51:44 2015
@@ -24,8 +24,8 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
+import org.apache.commons.io.IOUtils;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java Thu Aug 20 09:51:44 2015
@@ -24,10 +24,10 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.config.ServiceLoader;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java Thu Aug 20 09:51:44 2015
@@ -29,8 +29,8 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java Thu Aug 20 09:51:44 2015
@@ -18,7 +18,6 @@ package org.apache.tika.parser.image;
import java.io.IOException;
import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
@@ -38,6 +37,8 @@ import org.apache.tika.sax.XHTMLContentH
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.US_ASCII;
+
/**
* Parser for the Adobe Photoshop PSD File Format.
* <p/>
@@ -193,11 +194,7 @@ public class PSDParser extends AbstractP
private String getDataAsString() {
// Will be null padded
- try {
- return new String(data, 0, data.length - 1, "ASCII");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("Something is very broken in your JVM!");
- }
+ return new String(data, 0, data.length - 1, US_ASCII);
}
}
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java Thu Aug 20 09:51:44 2015
@@ -27,15 +27,16 @@ import java.util.List;
import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.jempbox.xmp.XMPSchemaDublinCore;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.xml.sax.InputSource;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class JempboxExtractor {
// The XMP spec says it must be unicode, but for most file formats it specifies "must be encoded in UTF-8"
- private static final String DEFAULT_XMP_CHARSET = IOUtils.UTF_8.name();
+ private static final String DEFAULT_XMP_CHARSET = UTF_8.name();
private XMPPacketScanner scanner = new XMPPacketScanner();
private Metadata metadata;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java Thu Aug 20 09:51:44 2015
@@ -22,7 +22,8 @@ package org.apache.tika.parser.image.xmp
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
/**
* This class is a parser for XMP packets. By default, it tries to locate the first XMP packet
@@ -41,13 +42,9 @@ public class XMPPacketScanner {
private static final byte[] PACKET_TRAILER;
static {
- try {
- PACKET_HEADER = "<?xpacket begin=".getBytes("US-ASCII");
- PACKET_HEADER_END = "?>".getBytes("US-ASCII");
- PACKET_TRAILER = "<?xpacket".getBytes("US-ASCII");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("Incompatible JVM! US-ASCII encoding not supported.");
- }
+ PACKET_HEADER = "<?xpacket begin=".getBytes(US_ASCII);
+ PACKET_HEADER_END = "?>".getBytes(US_ASCII);
+ PACKET_TRAILER = "<?xpacket".getBytes(US_ASCII);
}
private static boolean skipAfter(InputStream in, byte[] match) throws IOException {
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java Thu Aug 20 09:51:44 2015
@@ -28,7 +28,6 @@ import java.util.Set;
import java.util.TimeZone;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
@@ -38,6 +37,8 @@ import org.apache.tika.sax.XHTMLContentH
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Parser for IPTC ANPA New Wire Feeds
*/
@@ -162,7 +163,7 @@ public class IptcAnpaParser implements P
}
int msgsize = is.read(buf); // read in at least the full data
- String message = (new String(buf, IOUtils.UTF_8)).toLowerCase(Locale.ROOT);
+ String message = (new String(buf, UTF_8)).toLowerCase(Locale.ROOT);
// these are not if-then-else, because we want to go from most common
// and fall through to least. this is imperfect, as these tags could
// show up in other agency stories, but i can't find a spec or any
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/isatab/ISATabUtils.java Thu Aug 20 09:51:44 2015
@@ -28,10 +28,10 @@ import java.util.Map;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.config.ServiceLoader;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java Thu Aug 20 09:51:44 2015
@@ -30,9 +30,9 @@ import org.apache.commons.compress.archi
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipFile;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.detect.XmlRootExtractor;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java Thu Aug 20 09:51:44 2015
@@ -24,10 +24,10 @@ import java.sql.SQLException;
import java.util.List;
import java.util.Set;
+import org.apache.commons.io.IOExceptionWithCause;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.IOExceptionWithCause;
import org.apache.tika.metadata.Database;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java Thu Aug 20 09:51:44 2015
@@ -31,12 +31,12 @@ import java.sql.Types;
import java.util.LinkedList;
import java.util.List;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOExceptionWithCause;
+import org.apache.commons.io.IOUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.io.FilenameUtils;
-import org.apache.tika.io.IOExceptionWithCause;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Database;
import org.apache.tika.metadata.Metadata;
@@ -52,6 +52,8 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* General base class to iterate through rows of a JDBC table
*/
@@ -185,7 +187,7 @@ class JDBCTableReader {
//is there a more efficient way to go from a Reader to an InputStream?
String s = clob.getSubString(0, readSize);
EmbeddedDocumentExtractor ex = AbstractDBParser.getEmbeddedDocumentExtractor(context);
- ex.parseEmbedded(new ByteArrayInputStream(s.getBytes("UTF-8")), handler, m, true);
+ ex.parseEmbedded(new ByteArrayInputStream(s.getBytes(UTF_8)), handler, m, true);
}
protected void handleBlob(String tableName, String columnName, int rowNum, ResultSet resultSet, int columnIndex,
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java Thu Aug 20 09:51:44 2015
@@ -27,7 +27,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Set;
-import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.commons.io.IOExceptionWithCause;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java Thu Aug 20 09:51:44 2015
@@ -21,11 +21,11 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
+import org.apache.commons.io.input.TaggedInputStream;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.parser.MimeStreamParser;
import org.apache.james.mime4j.stream.MimeConfig;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TaggedInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
@@ -73,7 +73,9 @@ public class RFC822Parser extends Abstra
xhtml, metadata, context, config.isStrictParsing());
parser.setContentHandler(mch);
parser.setContentDecoding(true);
- TaggedInputStream tagged = TaggedInputStream.get(stream);
+ TaggedInputStream tagged = stream instanceof TaggedInputStream
+ ? (TaggedInputStream)stream
+ : new TaggedInputStream(stream);
try {
parser.parse(tagged);
} catch (IOException e) {
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java Thu Aug 20 09:51:44 2015
@@ -24,7 +24,6 @@ import java.util.Set;
import java.util.Map;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AbstractParser;
@@ -40,6 +39,8 @@ import com.jmatio.io.MatFileReader;
import com.jmatio.types.MLArray;
import com.jmatio.types.MLStructure;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class MatParser extends AbstractParser {
@@ -87,7 +88,7 @@ public class MatParser extends AbstractP
}
// Get endian indicator from header file
- String endianBytes = new String(hdr.getEndianIndicator(), IOUtils.UTF_8); // Retrieve endian bytes and convert to string
+ String endianBytes = new String(hdr.getEndianIndicator(), UTF_8); // Retrieve endian bytes and convert to string
String endianCode = String.valueOf(endianBytes.toCharArray()); // Convert bytes to characters to string
metadata.set("endian", endianCode);
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java Thu Aug 20 09:51:44 2015
@@ -17,6 +17,7 @@
package org.apache.tika.parser.mbox;
import static java.lang.String.valueOf;
+import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Collections.singleton;
import java.io.ByteArrayInputStream;
@@ -32,7 +33,6 @@ import com.pff.PSTMessage;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
@@ -153,7 +153,7 @@ public class OutlookPSTParser extends Ab
mailMetadata.set("priority", valueOf(pstMail.getPriority()));
mailMetadata.set("flagged", valueOf(pstMail.isFlagged()));
- byte[] mailContent = pstMail.getBody().getBytes(IOUtils.UTF_8);
+ byte[] mailContent = pstMail.getBody().getBytes(UTF_8);
embeddedExtractor.parseEmbedded(new ByteArrayInputStream(mailContent), handler, mailMetadata, true);
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java Thu Aug 20 09:51:44 2015
@@ -40,7 +40,6 @@ import com.healthmarketscience.jackcess.
import com.healthmarketscience.jackcess.util.OleBlob;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
@@ -51,6 +50,8 @@ import org.apache.tika.sax.BodyContentHa
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Internal class. Needs to be instantiated for each parse because of
* the lack of thread safety with the dateTimeFormatter
@@ -198,7 +199,7 @@ class JackcessExtractor extends Abstract
Metadata m = new Metadata();
m.set(Metadata.CONTENT_TYPE, "text/html; charset=UTF-8");
try {
- htmlParser.parse(new ByteArrayInputStream(v.getBytes(IOUtils.UTF_8)),
+ htmlParser.parse(new ByteArrayInputStream(v.getBytes(UTF_8)),
h,
m, EMPTY_PARSE_CONTEXT);
handler.characters(h.toString());
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Thu Aug 20 09:51:44 2015
@@ -25,6 +25,7 @@ import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.poifs.crypt.Decryptor;
@@ -36,7 +37,6 @@ import org.apache.poi.poifs.filesystem.N
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java Thu Aug 20 09:51:44 2015
@@ -45,7 +45,6 @@ import org.apache.poi.poifs.filesystem.D
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.util.CodePageUtil;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -61,6 +60,8 @@ import org.apache.tika.sax.EmbeddedConte
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Outlook Message Parser.
*/
@@ -341,7 +342,7 @@ public class OutlookExtractor extends Ab
Charset charset = null;
try {
charset = detector.detect(new ByteArrayInputStream(
- html.getBytes(IOUtils.UTF_8)), EMPTY_METADATA);
+ html.getBytes(UTF_8)), EMPTY_METADATA);
} catch (IOException e) {
//swallow
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java Thu Aug 20 09:51:44 2015
@@ -26,6 +26,7 @@ import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
+import org.apache.commons.io.IOUtils;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
@@ -33,7 +34,6 @@ import org.apache.poi.poifs.filesystem.D
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.tika.detect.Detector;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java Thu Aug 20 09:51:44 2015
@@ -47,13 +47,14 @@ import org.apache.poi.poifs.filesystem.D
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class WordExtractor extends AbstractPOIFSExtractor {
private static final char UNICODECHAR_NONBREAKING_HYPHEN = '\u2011';
@@ -298,7 +299,7 @@ public class WordExtractor extends Abstr
CharacterRun cr = p.getCharacterRun(j);
// FIELD_BEGIN_MARK:
- if (cr.text().getBytes(IOUtils.UTF_8)[0] == 0x13) {
+ if (cr.text().getBytes(UTF_8)[0] == 0x13) {
Field field = document.getFields().getFieldByStartOffset(docPart, cr.getStartOffset());
// 58 is an embedded document
// 56 is a document link
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java Thu Aug 20 09:51:44 2015
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.Locale;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
@@ -33,7 +34,6 @@ import org.apache.poi.xssf.extractor.XSS
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java Thu Aug 20 09:51:44 2015
@@ -18,7 +18,6 @@ package org.apache.tika.parser.mp3;
import java.io.IOException;
import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.List;
@@ -26,6 +25,8 @@ import org.apache.tika.exception.TikaExc
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+
/**
* This is used to parse ID3 Version 1 Tag information from an MP3 file,
* if available.
@@ -177,10 +178,6 @@ public class ID3v1Handler implements ID3
}
// Return the remaining substring
- try {
- return new String(buffer, start, end - start, "ISO-8859-1");
- } catch (UnsupportedEncodingException e) {
- throw new TikaException("ISO-8859-1 encoding is not available", e);
- }
+ return new String(buffer, start, end - start, ISO_8859_1);
}
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java Thu Aug 20 09:51:44 2015
@@ -24,6 +24,8 @@ import java.util.Iterator;
import org.apache.tika.parser.mp3.ID3Tags.ID3Comment;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+
/**
* A frame of ID3v2 data, which is then passed to a handler to
* be turned into useful data.
@@ -331,12 +333,7 @@ public class ID3v2Frame implements MP3Fr
* offset and length. Strings are ISO-8859-1
*/
protected static String getString(byte[] data, int offset, int length) {
- try {
- return new String(data, offset, length, "ISO-8859-1");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(
- "Core encoding ISO-8859-1 encoding is not available", e);
- }
+ return new String(data, offset, length, ISO_8859_1);
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java Thu Aug 20 09:51:44 2015
@@ -20,10 +20,12 @@ import java.io.IOException;
import java.io.InputStream;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* This is used to parse Lyrics3 tag information
* from an MP3 file, if available.
@@ -83,12 +85,12 @@ public class LyricsHandler {
// size including the LYRICSBEGIN but excluding the
// length+LYRICS200 at the end.
int length = Integer.parseInt(
- new String(tagData, lookat-6, 6, IOUtils.UTF_8)
+ new String(tagData, lookat-6, 6, UTF_8)
);
String lyrics = new String(
tagData, lookat-length+5, length-11,
- "ASCII"
+ US_ASCII
);
// Tags are a 3 letter code, 5 digit length, then data
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java Thu Aug 20 09:51:44 2015
@@ -40,9 +40,9 @@ import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
+import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
@@ -60,6 +60,8 @@ import org.apache.tika.sax.XHTMLContentH
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* TesseractOCRParser powered by tesseract-ocr engine. To enable this parser,
* create a {@link TesseractOCRConfig} object and pass it through a
@@ -288,7 +290,7 @@ public class TesseractOCRParser extends
*/
private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
- Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
+ Reader reader = new InputStreamReader(stream, UTF_8);
xhtml.startDocument();
xhtml.startElement("div");
try {
@@ -312,7 +314,7 @@ public class TesseractOCRParser extends
private void logStream(final String logType, final InputStream stream, final File file) {
new Thread() {
public void run() {
- Reader reader = new InputStreamReader(stream, IOUtils.UTF_8);
+ Reader reader = new InputStreamReader(stream, UTF_8);
StringBuilder out = new StringBuilder();
char[] buffer = new char[1024];
try {
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java Thu Aug 20 09:51:44 2015
@@ -31,8 +31,8 @@ import java.util.Map;
import java.util.Set;
import java.util.Stack;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java Thu Aug 20 09:51:44 2015
@@ -27,8 +27,8 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
+import org.apache.commons.io.IOUtils;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
@@ -41,6 +41,8 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* OpenOffice parser
*/
@@ -180,7 +182,7 @@ public class OpenDocumentParser extends
if (entry == null) return;
if (entry.getName().equals("mimetype")) {
- String type = IOUtils.toString(zip, IOUtils.UTF_8.name());
+ String type = IOUtils.toString(zip, UTF_8);
metadata.set(Metadata.CONTENT_TYPE, type);
} else if (entry.getName().equals(META_NAME)) {
meta.parse(zip, new DefaultHandler(), metadata, context);
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java Thu Aug 20 09:51:44 2015
@@ -29,6 +29,8 @@ import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
+import org.apache.commons.io.IOExceptionWithCause;
+import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
@@ -63,8 +65,6 @@ import org.apache.pdfbox.util.TextPositi
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.IOExceptionWithCause;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFEncodedStringDecoder.java Thu Aug 20 09:51:44 2015
@@ -24,6 +24,8 @@ import java.io.InputStream;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.BaseParser;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+
/**
* In fairly rare cases, a PDF's XMP will contain a string that
* has incorrectly been encoded with PDFEncoding: an octal for non-ascii and
@@ -77,7 +79,7 @@ class PDFEncodedStringDecoder {
*/
String decode(String value) {
try {
- byte[] bytes = new String("(" + value + ")").getBytes("ISO-8859-1");
+ byte[] bytes = new String("(" + value + ")").getBytes(ISO_8859_1);
InputStream is = new ByteArrayInputStream(bytes);
COSStringParser p = new COSStringParser(is);
String parsed = p.myParseCOSString();
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Thu Aug 20 09:51:44 2015
@@ -25,6 +25,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Set;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.jempbox.xmp.XMPSchema;
import org.apache.jempbox.xmp.XMPSchemaDublinCore;
import org.apache.jempbox.xmp.pdfa.XMPSchemaPDFAId;
@@ -44,7 +45,6 @@ import org.apache.pdfbox.pdmodel.font.PD
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.AccessPermissions;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java Thu Aug 20 09:51:44 2015
@@ -32,10 +32,10 @@ import org.apache.commons.compress.compr
import org.apache.commons.compress.compressors.gzip.GzipUtils;
import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java Thu Aug 20 09:51:44 2015
@@ -38,11 +38,11 @@ import org.apache.commons.compress.archi
import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException;
import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException.Feature;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu Aug 20 09:51:44 2015
@@ -35,6 +35,7 @@ import org.apache.commons.compress.archi
import org.apache.commons.compress.compressors.CompressorException;
import org.apache.commons.compress.compressors.CompressorInputStream;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
+import org.apache.commons.io.IOUtils;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
@@ -43,7 +44,6 @@ import org.apache.poi.openxml4j.opc.Pack
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
@@ -51,6 +51,8 @@ import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.iwork.IWorkPackageParser;
import org.apache.tika.parser.iwork.IWorkPackageParser.IWORKDocumentType;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* A detector that works on Zip documents and other archive and compression
* formats to figure out exactly what the file is.
@@ -188,7 +190,7 @@ public class ZipContainerDetector implem
if (mimetype != null) {
InputStream stream = zip.getInputStream(mimetype);
try {
- return MediaType.parse(IOUtils.toString(stream, IOUtils.UTF_8.name()));
+ return MediaType.parse(IOUtils.toString(stream, UTF_8));
} finally {
stream.close();
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java Thu Aug 20 09:51:44 2015
@@ -34,6 +34,8 @@ import org.apache.tika.sax.XHTMLContentH
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import static java.nio.charset.StandardCharsets.US_ASCII;
+
/**
* A basic text extracting parser for the CADKey PRT (CAD Drawing)
* format. It outputs text from note entries.
@@ -80,7 +82,7 @@ public class PRTParser extends AbstractP
byte[] date = new byte[12];
IOUtils.readFully(stream, date);
- String dateStr = new String(date, "ASCII");
+ String dateStr = new String(date, US_ASCII);
if(dateStr.startsWith("19") || dateStr.startsWith("20")) {
String formattedDate = dateStr.substring(0, 4) + "-" + dateStr.substring(4,6) +
"-" + dateStr.substring(6,8) + "T" + dateStr.substring(8,10) + ":" +
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java?rev=1696745&r1=1696744&r2=1696745&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java Thu Aug 20 09:51:44 2015
@@ -21,12 +21,12 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.commons.io.FilenameUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.FilenameUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.RTFMetadata;