You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ol...@apache.org on 2011/06/08 23:00:28 UTC
svn commit: r1133554 [3/5] - in
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm: ./
accessor/ assertion/ core/ exception/ lzx/
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java Wed Jun 8 21:00:27 2011
@@ -31,126 +31,126 @@ import org.apache.tika.parser.chm.core.C
*
*/
public class DirectoryListingEntry {
- /* Length of the entry name */
- private int name_length;
- /* Entry name or directory name */
- private String name;
- /* Entry type */
- private ChmCommons.EntryType entryType;
- /* Entry offset */
- private int offset;
- /* Entry size */
- private int length;
-
- public DirectoryListingEntry() {
-
- }
-
- /**
- * Constructs directoryListingEntry
- *
- * @param name_length
- * int
- * @param name
- * String
- * @param isCompressed
- * ChmCommons.EntryType
- * @param offset
- * int
- * @param length
- * int
- */
- public DirectoryListingEntry(int name_length, String name,
- ChmCommons.EntryType isCompressed, int offset, int length) {
- ChmAssert.assertDirectoryListingEntry(name_length, name, isCompressed,
- offset, length);
- setNameLength(name_length);
- setName(name);
- setEntryType(isCompressed);
- setOffset(offset);
- setLength(length);
- }
-
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("name_length:=" + getNameLength()
- + System.getProperty("line.separator"));
- sb.append("name:=" + getName() + System.getProperty("line.separator"));
- sb.append("entryType:=" + getEntryType()
- + System.getProperty("line.separator"));
- sb.append("offset:=" + getOffset()
- + System.getProperty("line.separator"));
- sb.append("length:=" + getLength());
- return sb.toString();
- }
-
- /**
- * Returns an entry name length
- *
- * @return int
- */
- public int getNameLength() {
- return name_length;
- }
-
- /**
- * Sets an entry name length
- *
- * @param name_length
- * int
- */
- protected void setNameLength(int name_length) {
- this.name_length = name_length;
- }
-
- /**
- * Returns an entry name
- *
- * @return String
- */
- public String getName() {
- return name;
- }
-
- /**
- * Sets entry name
- *
- * @param name
- * String
- */
- protected void setName(String name) {
- this.name = name;
- }
-
- /**
- * Returns ChmCommons.EntryType (COMPRESSED or UNCOMPRESSED)
- *
- * @return ChmCommons.EntryType
- */
- public ChmCommons.EntryType getEntryType() {
- return entryType;
- }
-
- protected void setEntryType(ChmCommons.EntryType entryType) {
- this.entryType = entryType;
- }
-
- public int getOffset() {
- return offset;
- }
-
- protected void setOffset(int offset) {
- this.offset = offset;
- }
-
- public int getLength() {
- return length;
- }
-
- protected void setLength(int length) {
- this.length = length;
- }
+ /* Length of the entry name */
+ private int name_length;
+ /* Entry name or directory name */
+ private String name;
+ /* Entry type */
+ private ChmCommons.EntryType entryType;
+ /* Entry offset */
+ private int offset;
+ /* Entry size */
+ private int length;
+
+ public DirectoryListingEntry() {
+
+ }
+
+ /**
+ * Constructs directoryListingEntry
+ *
+ * @param name_length
+ * int
+ * @param name
+ * String
+ * @param isCompressed
+ * ChmCommons.EntryType
+ * @param offset
+ * int
+ * @param length
+ * int
+ */
+ public DirectoryListingEntry(int name_length, String name,
+ ChmCommons.EntryType isCompressed, int offset, int length) {
+ ChmAssert.assertDirectoryListingEntry(name_length, name, isCompressed,
+ offset, length);
+ setNameLength(name_length);
+ setName(name);
+ setEntryType(isCompressed);
+ setOffset(offset);
+ setLength(length);
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("name_length:=" + getNameLength()
+ + System.getProperty("line.separator"));
+ sb.append("name:=" + getName() + System.getProperty("line.separator"));
+ sb.append("entryType:=" + getEntryType()
+ + System.getProperty("line.separator"));
+ sb.append("offset:=" + getOffset()
+ + System.getProperty("line.separator"));
+ sb.append("length:=" + getLength());
+ return sb.toString();
+ }
+
+ /**
+ * Returns an entry name length
+ *
+ * @return int
+ */
+ public int getNameLength() {
+ return name_length;
+ }
+
+ /**
+ * Sets an entry name length
+ *
+ * @param name_length
+ * int
+ */
+ protected void setNameLength(int name_length) {
+ this.name_length = name_length;
+ }
+
+ /**
+ * Returns an entry name
+ *
+ * @return String
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Sets entry name
+ *
+ * @param name
+ * String
+ */
+ protected void setName(String name) {
+ this.name = name;
+ }
+
+ /**
+ * Returns ChmCommons.EntryType (COMPRESSED or UNCOMPRESSED)
+ *
+ * @return ChmCommons.EntryType
+ */
+ public ChmCommons.EntryType getEntryType() {
+ return entryType;
+ }
+
+ protected void setEntryType(ChmCommons.EntryType entryType) {
+ this.entryType = entryType;
+ }
+
+ public int getOffset() {
+ return offset;
+ }
+
+ protected void setOffset(int offset) {
+ this.offset = offset;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ protected void setLength(int length) {
+ this.length = length;
+ }
- public static void main(String[] args) {
- }
+ public static void main(String[] args) {
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java Wed Jun 8 21:00:27 2011
@@ -27,141 +27,133 @@ import org.apache.tika.parser.chm.except
* Contains chm extractor assertions
*/
public class ChmAssert {
- /**
- * Checks a validity of the chmBlockSegment parameters
- *
- * @param data
- * byte[]
- * @param resetTable
- * ChmLzxcResetTable
- * @param blockNumber
- * int
- * @param lzxcBlockOffset
- * int
- * @param lzxcBlockLength
- * int
- */
- public static final void assertChmBlockSegment(byte[] data,
- ChmLzxcResetTable resetTable, int blockNumber, int lzxcBlockOffset,
- int lzxcBlockLength) {
- if ((data == null))
- throw new ChmParsingException("data[] is null");
-
- if ((data.length <= 0))
- throw new ChmParsingException(
- "data[] length should be greater than zero");
-
- if (resetTable == null)
- throw new ChmParsingException("resetTable is null");
-
- if (resetTable.getBlockAddress().length <= 1)
- throw new ChmParsingException(
- "resetTable.getBlockAddress().length should be greater than zero");
-
- if (blockNumber < 0)
- throw new ChmParsingException(
- "blockNumber should be positive number");
-
- if (lzxcBlockOffset < 0)
- throw new ChmParsingException(
- "lzxcBlockOffset should be positive number");
-
- if (lzxcBlockLength < 0)
- throw new ChmParsingException(
- "lzxcBlockLength should be positive number");
- }
-
- /**
- * Checks if InputStream is not null
- *
- * @param is
- * InputStream
- */
- public static final void assertInputStreamNotNull(InputStream is) {
- if (is == null)
- throw new ChmParsingException("input sream is null");
- }
-
- /**
- * Checks validity of ChmAccessor parameters
- *
- * @param data
- * @param chmItsfHeader
- * @param count
- */
- public static final void assertChmAccessorParameters(byte[] data,
- ChmAccessor<?> chmAccessor, int count) {
- assertByteArrayNotNull(data);
- assertChmAccessorNotNull(chmAccessor);
- }
-
- /**
- * Checks if byte[] is not null
- *
- * @param data
- */
- public static final void assertByteArrayNotNull(byte[] data) {
- if (data == null)
- throw new ChmParsingException("byte[] data is null");
- }
-
- /**
- * Checks if ChmAccessor is not null In case of null throws exception
- *
- * @param ChmAccessor
- */
- public static final void assertChmAccessorNotNull(ChmAccessor<?> chmAccessor) {
- if (chmAccessor == null)
- throw new ChmParsingException("chm header is null");
- }
-
- /**
- * Checks validity of the DirectoryListingEntry's parameters In case of
- * invalid parameter(s) throws an exception
- *
- * @param name_length
- * length of the chm entry name
- * @param name
- * chm entry name
- * @param entryType
- * EntryType
- * @param offset
- * @param length
- */
- public static final void assertDirectoryListingEntry(int name_length,
- String name, ChmCommons.EntryType entryType, int offset, int length) {
- if (name_length < 0)
- throw new ChmParsingException("invalid name length");
- if (name == null)
- throw new ChmParsingException("invalid name");
-
- if ((entryType != ChmCommons.EntryType.COMPRESSED)
- && (entryType != ChmCommons.EntryType.UNCOMPRESSED))
- throw new ChmParsingException(
- "invalid compressed type, should be EntryType.COMPRESSED | EntryType.UNCOMPRESSED");
-
- if (offset < 0)
- throw new ChmParsingException("invalid offset");
-
- if (length < 0)
- throw new ChmParsingException("invalid length");
- }
-
- public static void assertCopyingDataIndex(int index, int dataLength) {
- if (index >= dataLength)
- throw new ChmParsingException(
- "cannot parse chm file index > data.length");
- }
-
- /**
- * Checks if int param is greater than zero In case param <=0 throws an
- * exception
- *
- * @param param
- */
- public static void assertPositiveInt(int param) {
- if (param <= 0)
- throw new ChmParsingException(
- "resetTable.getBlockAddress().length should be greater than zero");
- }
+ /**
+ * Checks a validity of the chmBlockSegment parameters
+ *
+ * @param data
+ * byte[]
+ * @param resetTable
+ * ChmLzxcResetTable
+ * @param blockNumber
+ * int
+ * @param lzxcBlockOffset
+ * int
+ * @param lzxcBlockLength
+ * int
+ */
+ public static final void assertChmBlockSegment(byte[] data,
+ ChmLzxcResetTable resetTable, int blockNumber, int lzxcBlockOffset,
+ int lzxcBlockLength) {
+ if ((data == null))
+ throw new ChmParsingException("data[] is null");
+
+ if ((data.length <= 0))
+ throw new ChmParsingException("data[] length should be greater than zero");
+
+ if (resetTable == null)
+ throw new ChmParsingException("resetTable is null");
+
+ if (resetTable.getBlockAddress().length <= 1)
+ throw new ChmParsingException("resetTable.getBlockAddress().length should be greater than zero");
+
+ if (blockNumber < 0)
+ throw new ChmParsingException("blockNumber should be positive number");
+
+ if (lzxcBlockOffset < 0)
+ throw new ChmParsingException("lzxcBlockOffset should be positive number");
+
+ if (lzxcBlockLength < 0)
+ throw new ChmParsingException("lzxcBlockLength should be positive number");
+ }
+
+ /**
+ * Checks if InputStream is not null
+ *
+ * @param is
+ * InputStream
+ */
+ public static final void assertInputStreamNotNull(InputStream is) {
+ if (is == null)
+ throw new ChmParsingException("input sream is null");
+ }
+
+ /**
+ * Checks validity of ChmAccessor parameters
+ *
+ * @param data
+ * @param chmItsfHeader
+ * @param count
+ */
+ public static final void assertChmAccessorParameters(byte[] data,
+ ChmAccessor<?> chmAccessor, int count) {
+ assertByteArrayNotNull(data);
+ assertChmAccessorNotNull(chmAccessor);
+ }
+
+ /**
+ * Checks if byte[] is not null
+ *
+ * @param data
+ */
+ public static final void assertByteArrayNotNull(byte[] data) {
+ if (data == null)
+ throw new ChmParsingException("byte[] data is null");
+ }
+
+ /**
+ * Checks if ChmAccessor is not null In case of null throws exception
+ *
+ * @param ChmAccessor
+ */
+ public static final void assertChmAccessorNotNull(ChmAccessor<?> chmAccessor) {
+ if (chmAccessor == null)
+ throw new ChmParsingException("chm header is null");
+ }
+
+ /**
+ * Checks validity of the DirectoryListingEntry's parameters In case of
+ * invalid parameter(s) throws an exception
+ *
+ * @param name_length
+ * length of the chm entry name
+ * @param name
+ * chm entry name
+ * @param entryType
+ * EntryType
+ * @param offset
+ * @param length
+ */
+ public static final void assertDirectoryListingEntry(int name_length,
+ String name, ChmCommons.EntryType entryType, int offset, int length) {
+ if (name_length < 0)
+ throw new ChmParsingException("invalid name length");
+ if (name == null)
+ throw new ChmParsingException("invalid name");
+
+ if ((entryType != ChmCommons.EntryType.COMPRESSED)
+ && (entryType != ChmCommons.EntryType.UNCOMPRESSED))
+ throw new ChmParsingException("invalid compressed type, should be EntryType.COMPRESSED | EntryType.UNCOMPRESSED");
+
+ if (offset < 0)
+ throw new ChmParsingException("invalid offset");
+
+ if (length < 0)
+ throw new ChmParsingException("invalid length");
+ }
+
+ public static void assertCopyingDataIndex(int index, int dataLength) {
+ if (index >= dataLength)
+ throw new ChmParsingException("cannot parse chm file index > data.length");
+ }
+
+ /**
+ * Checks if int param is greater than zero In case param <=0 throws an
+ * exception
+ *
+ * @param param
+ */
+ public static void assertPositiveInt(int param) {
+ if (param <= 0)
+ throw new ChmParsingException("resetTable.getBlockAddress().length should be greater than zero");
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java Wed Jun 8 21:00:27 2011
@@ -21,7 +21,6 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
@@ -31,344 +30,371 @@ import org.apache.tika.parser.chm.assert
import org.apache.tika.parser.chm.exception.ChmParsingException;
public class ChmCommons {
- /* Prevents initialization */
- private ChmCommons() {
- }
-
- public static void assertByteArrayNotNull(byte[] data) {
- if (data == null)
- throw new ChmParsingException("byte[] is null");
- }
-
- /**
- * Represents entry types: uncompressed, compressed
- */
- public enum EntryType {
- UNCOMPRESSED, COMPRESSED
- }
-
- /**
- * Represents lzx states: started decoding, not started decoding
- */
- public enum LzxState {
- STARTED_DECODING, NOT_STARTED_DECODING
- }
-
- /**
- * Represents intel file states during decompression
- */
- public enum IntelState {
- STARTED, NOT_STARTED
- }
-
- /**
- * Represents lzx block types in order to decompress differently
- */
- public final static int UNDEFINED = 0;
- public final static int VERBATIM = 1;
- public final static int ALIGNED_OFFSET = 2;
- public final static int UNCOMPRESSED = 3;
-
- /**
- * LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) Returns X,
- * i.e 2^X
- *
- * @param window
- * chmLzxControlData.getWindowSize()
- *
- * @return window size
- */
- public static int getWindowSize(int window) {
- int win = 0;
- while (window > 1) {
- window >>>= 1;
- win++;
- }
- return win;
- }
-
- public static byte[] getChmBlockSegment(byte[] data,
- ChmLzxcResetTable resetTable, int blockNumber, int lzxcBlockOffset,
- int lzxcBlockLength) {
- ChmAssert.assertChmBlockSegment(data, resetTable, blockNumber,
- lzxcBlockOffset, lzxcBlockLength);
- int blockLength = -1;
- // TODO add int_max_value checking
- if (blockNumber < (resetTable.getBlockAddress().length - 1)) {
- blockLength = (int) (resetTable.getBlockAddress()[blockNumber + 1] - resetTable
- .getBlockAddress()[blockNumber]);
- } else {
- /* new code */
- if (blockNumber >= resetTable.getBlockAddress().length)
- blockLength = 0;
- else
- /* end new code */
- blockLength = (int) (lzxcBlockLength - resetTable
- .getBlockAddress()[blockNumber]);
- }
- byte[] t = Arrays
- .copyOfRange(
- data,
- (int) (lzxcBlockOffset + resetTable.getBlockAddress()[blockNumber]),
- (int) (lzxcBlockOffset
- + resetTable.getBlockAddress()[blockNumber] + blockLength));
- return (t != null) ? t : new byte[1];
- }
-
- /**
- * Returns textual representation of LangID
- *
- * @param langID
- *
- * @return language name
- */
- public static String getLanguage(long langID) {
- /* Potential problem with casting */
- switch ((int) langID) {
- case 1025:
- return "Arabic";
- case 1069:
- return "Basque";
- case 1027:
- return "Catalan";
- case 2052:
- return "Chinese (Simplified)";
- case 1028:
- return "Chinese (Traditional)";
- case 1029:
- return "Czech";
- case 1030:
- return "Danish";
- case 1043:
- return "Dutch";
- case 1033:
- return "English (United States)";
- case 1035:
- return "Finnish";
- case 1036:
- return "French";
- case 1031:
- return "German";
- case 1032:
- return "Greek";
- case 1037:
- return "Hebrew";
- case 1038:
- return "Hungarian";
- case 1040:
- return "Italian";
- case 1041:
- return "Japanese";
- case 1042:
- return "Korean";
- case 1044:
- return "Norwegian";
- case 1045:
- return "Polish";
- case 2070:
- return "Portuguese";
- case 1046:
- return "Portuguese (Brazil)";
- case 1049:
- return "Russian";
- case 1051:
- return "Slovakian";
- case 1060:
- return "Slovenian";
- case 3082:
- return "Spanish";
- case 1053:
- return "Swedish";
- case 1055:
- return "Turkish";
- default:
- return "unknown - http://msdn.microsoft.com/en-us/library/bb165625%28VS.80%29.aspx";
- }
- }
-
- /**
- * Checks skippable patterns
- *
- * @param directoryListingEntry
- *
- * @return boolean
- */
- public static boolean hasSkip(DirectoryListingEntry directoryListingEntry) {
- return (directoryListingEntry.getName().startsWith("/$")
- || directoryListingEntry.getName().startsWith("/#") || directoryListingEntry
- .getName().startsWith("::")) ? true : false;
- }
-
- /**
- * Writes byte[][] to the file
- *
- * @param buffer
- * @param fileToBeSaved
- * file name
- */
- public static void writeFile(byte[][] buffer, String fileToBeSaved) {
- FileOutputStream output = null;
- if (buffer != null && fileToBeSaved != null && !fileToBeSaved.isEmpty()) {
- try {
- output = new FileOutputStream(fileToBeSaved);
- if (output != null)
- for (int i = 0; i < buffer.length; i++) {
- output.write(buffer[i]);
- }
- } catch (FileNotFoundException e) {
- System.err.println("The " + fileToBeSaved
- + " does not seem correct");
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- if (output != null)
- try {
- output.flush();
- output.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- }
-
- /**
- * Reverses the order of given array
- *
- * @param array
- */
- public static void reverse(byte[] array) {
- if (array == null) {
- return;
- }
- int i = 0;
- int j = array.length - 1;
- byte tmp;
- while (j > i) {
- tmp = array[j];
- array[j] = array[i];
- array[i] = tmp;
- j--;
- i++;
- }
- }
-
- /**
- * Returns byte array Closes the InputStream
- *
- * @param is
- * InputStream of chm file
- *
- * @return byte array
- *
- * @throws IOException
- */
- public static byte[] toByteArray(InputStream is) throws IOException {
- if (is != null) {
- ByteArrayOutputStream buffer = new ByteArrayOutputStream();
- int nRead;
- byte[] data = new byte[16384];
- while ((nRead = is.read(data, 0, data.length)) != -1) {
- buffer.write(data, 0, nRead);
- }
- buffer.flush();
- try {
- is.close();
- buffer.close();
- } catch (Exception e) {
- System.err.println(e.getMessage());
- }
- return buffer.toByteArray();
- } else
- throw new ChmParsingException("InputStream is null");
- }
-
- /**
- * Returns an index of the reset table
- *
- * @param text
- * @param pattern
- * @return index of the reset table
- */
- public static final int indexOfResetTableBlock(byte[] text, byte[] pattern) {
- return (indexOf(text, pattern)) - 4;
- }
-
- /**
- * Searches some pattern in byte[]
- *
- * @param text
- * byte[]
- * @param pattern
- * byte[]
- * @return an index, if nothing found returns -1
- */
- public static int indexOf(byte[] text, byte[] pattern) {
- int[] next = null;
- int i = 0, j = -1;
-
- /* Preprocessing */
- if (pattern != null && text != null) {
- next = new int[pattern.length];
- next[0] = -1;
- } else
- throw new ChmParsingException(
- "pattern and/or text should not be null");
-
- /* Computes a failure function */
- while (i < pattern.length - 1) {
- if (j == -1 || pattern[i] == pattern[j]) {
- i++;
- j++;
- if (pattern[i] != pattern[j])
- next[i] = j;
- else
- next[i] = next[j];
- } else
- j = next[j];
- }
-
- /* Reinitializes local variables */
- i = j = 0;
-
- /* Matching */
- while (i < text.length && j < pattern.length) {
- if (j == -1 || pattern[j] == text[i]) {
- i++;
- j++;
- } else
- j = next[j];
- }
- if (j == pattern.length)
- return (i - j); // match found at offset i - M
- else
- return -1; // not found
- }
-
- /**
- * Searches for some pattern in the directory listing entry list
- *
- * @param list
- * @param pattern
- * @return an index, if nothing found returns -1
- */
- public static int indexOf(List<DirectoryListingEntry> list, String pattern) {
- int place = 0;
- for (Iterator<DirectoryListingEntry> iterator = list.iterator(); iterator
- .hasNext();) {
- DirectoryListingEntry directoryListingEntry = iterator.next();
- if (directoryListingEntry.toString().contains(pattern)) {
- return place;
- } else
- ++place;
- }
- return -1;// not found
- }
-
- /**
- * @param args
- */
- public static void main(String[] args) {
- }
+ /* Prevents initialization */
+ private ChmCommons() {
+ }
+
+ public static void assertByteArrayNotNull(byte[] data) {
+ if (data == null)
+ throw new ChmParsingException("byte[] is null");
+ }
+
+ /**
+ * Represents entry types: uncompressed, compressed
+ */
+ public enum EntryType {
+ UNCOMPRESSED, COMPRESSED
+ }
+
+ /**
+ * Represents lzx states: started decoding, not started decoding
+ */
+ public enum LzxState {
+ STARTED_DECODING, NOT_STARTED_DECODING
+ }
+
+ /**
+ * Represents intel file states during decompression
+ */
+ public enum IntelState {
+ STARTED, NOT_STARTED
+ }
+
+ /**
+ * Represents lzx block types in order to decompress differently
+ */
+ public final static int UNDEFINED = 0;
+ public final static int VERBATIM = 1;
+ public final static int ALIGNED_OFFSET = 2;
+ public final static int UNCOMPRESSED = 3;
+
+ /**
+ * LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) Returns X,
+ * i.e 2^X
+ *
+ * @param window
+ * chmLzxControlData.getWindowSize()
+ *
+ * @return window size
+ */
+ public static int getWindowSize(int window) {
+ int win = 0;
+ while (window > 1) {
+ window >>>= 1;
+ win++;
+ }
+ return win;
+ }
+
+ public static byte[] getChmBlockSegment(byte[] data,
+ ChmLzxcResetTable resetTable, int blockNumber, int lzxcBlockOffset,
+ int lzxcBlockLength) {
+ ChmAssert.assertChmBlockSegment(data, resetTable, blockNumber, lzxcBlockOffset, lzxcBlockLength);
+ int blockLength = -1;
+ // TODO add int_max_value checking
+ if (blockNumber < (resetTable.getBlockAddress().length - 1)) {
+ blockLength = (int) (resetTable.getBlockAddress()[blockNumber + 1] - resetTable
+ .getBlockAddress()[blockNumber]);
+ } else {
+ /* new code */
+ if (blockNumber >= resetTable.getBlockAddress().length)
+ blockLength = 0;
+ else
+ /* end new code */
+ blockLength = (int) (lzxcBlockLength - resetTable
+ .getBlockAddress()[blockNumber]);
+ }
+ byte[] t = ChmCommons
+ .copyOfRange(
+ data,
+ (int) (lzxcBlockOffset + resetTable.getBlockAddress()[blockNumber]),
+ (int) (lzxcBlockOffset
+ + resetTable.getBlockAddress()[blockNumber] + blockLength));
+ return (t != null) ? t : new byte[1];
+ }
+
+ /**
+ * Returns textual representation of LangID
+ *
+ * @param langID
+ *
+ * @return language name
+ */
+ public static String getLanguage(long langID) {
+ /* Potential problem with casting */
+ switch ((int) langID) {
+ case 1025:
+ return "Arabic";
+ case 1069:
+ return "Basque";
+ case 1027:
+ return "Catalan";
+ case 2052:
+ return "Chinese (Simplified)";
+ case 1028:
+ return "Chinese (Traditional)";
+ case 1029:
+ return "Czech";
+ case 1030:
+ return "Danish";
+ case 1043:
+ return "Dutch";
+ case 1033:
+ return "English (United States)";
+ case 1035:
+ return "Finnish";
+ case 1036:
+ return "French";
+ case 1031:
+ return "German";
+ case 1032:
+ return "Greek";
+ case 1037:
+ return "Hebrew";
+ case 1038:
+ return "Hungarian";
+ case 1040:
+ return "Italian";
+ case 1041:
+ return "Japanese";
+ case 1042:
+ return "Korean";
+ case 1044:
+ return "Norwegian";
+ case 1045:
+ return "Polish";
+ case 2070:
+ return "Portuguese";
+ case 1046:
+ return "Portuguese (Brazil)";
+ case 1049:
+ return "Russian";
+ case 1051:
+ return "Slovakian";
+ case 1060:
+ return "Slovenian";
+ case 3082:
+ return "Spanish";
+ case 1053:
+ return "Swedish";
+ case 1055:
+ return "Turkish";
+ default:
+ return "unknown - http://msdn.microsoft.com/en-us/library/bb165625%28VS.80%29.aspx";
+ }
+ }
+
+ /**
+ * Checks skippable patterns
+ *
+ * @param directoryListingEntry
+ *
+ * @return boolean
+ */
+ public static boolean hasSkip(DirectoryListingEntry directoryListingEntry) {
+ return (directoryListingEntry.getName().startsWith("/$")
+ || directoryListingEntry.getName().startsWith("/#") || directoryListingEntry
+ .getName().startsWith("::")) ? true : false;
+ }
+
+ /**
+ * Writes byte[][] to the file
+ *
+ * @param buffer
+ * @param fileToBeSaved
+ * file name
+ */
+ public static void writeFile(byte[][] buffer, String fileToBeSaved) {
+ FileOutputStream output = null;
+ if (buffer != null && fileToBeSaved != null
+ && !ChmCommons.isEmpty(fileToBeSaved)) {
+ try {
+ output = new FileOutputStream(fileToBeSaved);
+ if (output != null)
+ for (int i = 0; i < buffer.length; i++) {
+ output.write(buffer[i]);
+ }
+ } catch (FileNotFoundException e) {
+ System.err.println("The " + fileToBeSaved
+ + " does not seem correct");
+ } catch (IOException e) {
+ e.printStackTrace();
+ } finally {
+ if (output != null)
+ try {
+ output.flush();
+ output.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ /**
+ * Reverses the order of given array
+ *
+ * @param array
+ */
+ public static void reverse(byte[] array) {
+ if (array == null) {
+ return;
+ }
+ int i = 0;
+ int j = array.length - 1;
+ byte tmp;
+ while (j > i) {
+ tmp = array[j];
+ array[j] = array[i];
+ array[i] = tmp;
+ j--;
+ i++;
+ }
+ }
+
+ /**
+ * Returns byte array Closes the InputStream
+ *
+ * @param is
+ * InputStream of chm file
+ *
+ * @return byte array
+ *
+ * @throws IOException
+ */
+ public static byte[] toByteArray(InputStream is) throws IOException {
+ if (is != null) {
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ int nRead;
+ byte[] data = new byte[16384];
+ while ((nRead = is.read(data, 0, data.length)) != -1) {
+ buffer.write(data, 0, nRead);
+ }
+ buffer.flush();
+ try {
+ is.close();
+ buffer.close();
+ } catch (Exception e) {
+ System.err.println(e.getMessage());
+ }
+ return buffer.toByteArray();
+ } else
+ throw new ChmParsingException("InputStream is null");
+ }
+
+ /**
+ * Returns an index of the reset table
+ *
+ * @param text
+ * @param pattern
+ * @return index of the reset table
+ */
+ public static final int indexOfResetTableBlock(byte[] text, byte[] pattern) {
+ return (indexOf(text, pattern)) - 4;
+ }
+
+ /**
+ * Searches some pattern in byte[]
+ *
+ * @param text
+ * byte[]
+ * @param pattern
+ * byte[]
+ * @return an index, if nothing found returns -1
+ */
+ public static int indexOf(byte[] text, byte[] pattern) {
+ int[] next = null;
+ int i = 0, j = -1;
+
+ /* Preprocessing */
+ if (pattern != null && text != null) {
+ next = new int[pattern.length];
+ next[0] = -1;
+ } else
+ throw new ChmParsingException("pattern and/or text should not be null");
+
+ /* Computes a failure function */
+ while (i < pattern.length - 1) {
+ if (j == -1 || pattern[i] == pattern[j]) {
+ i++;
+ j++;
+ if (pattern[i] != pattern[j])
+ next[i] = j;
+ else
+ next[i] = next[j];
+ } else
+ j = next[j];
+ }
+
+ /* Reinitializes local variables */
+ i = j = 0;
+
+ /* Matching */
+ while (i < text.length && j < pattern.length) {
+ if (j == -1 || pattern[j] == text[i]) {
+ i++;
+ j++;
+ } else
+ j = next[j];
+ }
+ if (j == pattern.length)
+ return (i - j); // match found at offset i - M
+ else
+ return -1; // not found
+ }
+
+ /**
+ * Searches for some pattern in the directory listing entry list
+ *
+ * @param list
+ * @param pattern
+ * @return an index, if nothing found returns -1
+ */
+ public static int indexOf(List<DirectoryListingEntry> list, String pattern) {
+ int place = 0;
+ for (Iterator<DirectoryListingEntry> iterator = list.iterator(); iterator.hasNext();) {
+ DirectoryListingEntry directoryListingEntry = iterator.next();
+ if (directoryListingEntry.toString().contains(pattern)) {
+ return place;
+ } else
+ ++place;
+ }
+ return -1;// not found
+ }
+
+ /*
+ * This method is added because of supporting of Java 5
+ */
+ public static byte[] copyOfRange(byte[] original, int from, int to) {
+ checkCopyOfRangeParams(original, from, to);
+ int newLength = to - from;
+ if (newLength < 0)
+ throw new IllegalArgumentException(from + " > " + to);
+ byte[] copy = new byte[newLength];
+ System.arraycopy(original, from, copy, 0, Math.min(original.length - from, newLength));
+ return copy;
+ }
+
+ private static void checkCopyOfRangeParams(byte[] original, int from, int to) {
+ if (original == null)
+ throw new NullPointerException("array is null");
+ if (from < 0)
+ throw new IllegalArgumentException(from + " should be > 0");
+ if (to < 0)
+ throw new IllegalArgumentException(to + " should be > 0");
+ }
+
+ /*
+ * This method is added because of supporting of Java 5
+ */
+ public static boolean isEmpty(String str) {
+ return str == null || str.length() == 0;
+ }
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java Wed Jun 8 21:00:27 2011
@@ -17,84 +17,84 @@
package org.apache.tika.parser.chm.core;
public class ChmConstants {
- /* Prevents instantiation */
- private ChmConstants() {
- }
-
- public static final String DEFAULT_CHARSET = "UTF-8";
- public static final String ITSF = "ITSF";
- public static final String ITSP = "ITSP";
- public static final String PMGL = "PMGL";
- public static final String LZXC = "LZXC";
- public static final String CHM_PMGI_MARKER = "PMGI";
- public static final int BYTE_ARRAY_LENGHT = 16;
- public static final int CHM_ITSF_V2_LEN = 0x58;
- public static final int CHM_ITSF_V3_LEN = 0x60;
- public static final int CHM_ITSP_V1_LEN = 0x54;
- public static final int CHM_PMGL_LEN = 0x14;
- public static final int CHM_PMGI_LEN = 0x08;
- public static final int CHM_LZXC_RESETTABLE_V1_LEN = 0x28;
- public static final int CHM_LZXC_MIN_LEN = 0x18;
- public static final int CHM_LZXC_V2_LEN = 0x1c;
- public static final int CHM_SIGNATURE_LEN = 4;
- public static final int CHM_VER_2 = 2;
- public static final int CHM_VER_3 = 3;
- public static final int CHM_VER_1 = 1;
- public static final int CHM_WINDOW_SIZE_BLOCK = 0x8000;
-
- /* my hacking */
- public static final int START_PMGL = 0xCC;
- public static final String CONTROL_DATA = "ControlData";
- public static final String RESET_TABLE = "ResetTable";
- public static final String CONTENT = "Content";
-
- /* some constants defined by the LZX specification */
- public static final int LZX_MIN_MATCH = 2;
- public static final int LZX_MAX_MATCH = 257;
- public static final int LZX_NUM_CHARS = 256;
- public static final int LZX_BLOCKTYPE_INVALID = 0; /*
- * also blocktypes 4-7
- * invalid
- */
- public static final int LZX_BLOCKTYPE_VERBATIM = 1;
- public static final int LZX_BLOCKTYPE_ALIGNED = 2;
- public static final int LZX_BLOCKTYPE_UNCOMPRESSED = 3;
- public static final int LZX_PRETREE_NUM_ELEMENTS_BITS = 4; /* ??? */
- public static final int LZX_PRETREE_NUM_ELEMENTS = 20;
- public static final int LZX_ALIGNED_NUM_ELEMENTS = 8; /*
- * aligned offset tree
- * #elements
- */
- public static final int LZX_NUM_PRIMARY_LENGTHS = 7; /*
- * this one missing from
- * spec!
- */
- public static final int LZX_NUM_SECONDARY_LENGTHS = 249; /*
- * length tree
- * #elements
- */
-
- /* LZX huffman defines: tweak tablebits as desired */
- public static final int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
- public static final int LZX_PRETREE_TABLEBITS = 6;
- public static final int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
- public static final int LZX_MAIN_MAXSYMBOLS = LZX_NUM_CHARS * 2;
- public static final int LZX_MAINTREE_TABLEBITS = 12;
- public static final int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
- public static final int LZX_LENGTH_TABLEBITS = 12;
- public static final int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
- public static final int LZX_ALIGNED_TABLEBITS = 7;
- public static final int LZX_LENTABLE_SAFETY = 64;
-
- public static short[] EXTRA_BITS = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
- 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
- 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17 };
-
- public static int[] POSITION_BASE = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32,
- 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072,
- 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304,
- 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504,
- 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 1835008,
- 1966080, 2097152 };
+ /* Prevents instantiation */
+ private ChmConstants() {
+ }
+
+ public static final String DEFAULT_CHARSET = "UTF-8";
+ public static final String ITSF = "ITSF";
+ public static final String ITSP = "ITSP";
+ public static final String PMGL = "PMGL";
+ public static final String LZXC = "LZXC";
+ public static final String CHM_PMGI_MARKER = "PMGI";
+ public static final int BYTE_ARRAY_LENGHT = 16;
+ public static final int CHM_ITSF_V2_LEN = 0x58;
+ public static final int CHM_ITSF_V3_LEN = 0x60;
+ public static final int CHM_ITSP_V1_LEN = 0x54;
+ public static final int CHM_PMGL_LEN = 0x14;
+ public static final int CHM_PMGI_LEN = 0x08;
+ public static final int CHM_LZXC_RESETTABLE_V1_LEN = 0x28;
+ public static final int CHM_LZXC_MIN_LEN = 0x18;
+ public static final int CHM_LZXC_V2_LEN = 0x1c;
+ public static final int CHM_SIGNATURE_LEN = 4;
+ public static final int CHM_VER_2 = 2;
+ public static final int CHM_VER_3 = 3;
+ public static final int CHM_VER_1 = 1;
+ public static final int CHM_WINDOW_SIZE_BLOCK = 0x8000;
+
+ /* my hacking */
+ public static final int START_PMGL = 0xCC;
+ public static final String CONTROL_DATA = "ControlData";
+ public static final String RESET_TABLE = "ResetTable";
+ public static final String CONTENT = "Content";
+
+ /* some constants defined by the LZX specification */
+ public static final int LZX_MIN_MATCH = 2;
+ public static final int LZX_MAX_MATCH = 257;
+ public static final int LZX_NUM_CHARS = 256;
+ public static final int LZX_BLOCKTYPE_INVALID = 0; /*
+ * also blocktypes 4-7
+ * invalid
+ */
+ public static final int LZX_BLOCKTYPE_VERBATIM = 1;
+ public static final int LZX_BLOCKTYPE_ALIGNED = 2;
+ public static final int LZX_BLOCKTYPE_UNCOMPRESSED = 3;
+ public static final int LZX_PRETREE_NUM_ELEMENTS_BITS = 4; /* ??? */
+ public static final int LZX_PRETREE_NUM_ELEMENTS = 20;
+ public static final int LZX_ALIGNED_NUM_ELEMENTS = 8; /*
+ * aligned offset tree
+ * #elements
+ */
+ public static final int LZX_NUM_PRIMARY_LENGTHS = 7; /*
+ * this one missing
+ * from spec!
+ */
+ public static final int LZX_NUM_SECONDARY_LENGTHS = 249; /*
+ * length tree
+ * #elements
+ */
+
+ /* LZX huffman defines: tweak tablebits as desired */
+ public static final int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
+ public static final int LZX_PRETREE_TABLEBITS = 6;
+ public static final int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
+ public static final int LZX_MAIN_MAXSYMBOLS = LZX_NUM_CHARS * 2;
+ public static final int LZX_MAINTREE_TABLEBITS = 12;
+ public static final int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
+ public static final int LZX_LENGTH_TABLEBITS = 12;
+ public static final int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
+ public static final int LZX_ALIGNED_TABLEBITS = 7;
+ public static final int LZX_LENTABLE_SAFETY = 64;
+
+ public static short[] EXTRA_BITS = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
+ 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
+ 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17 };
+
+ public static int[] POSITION_BASE = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32,
+ 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072,
+ 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304,
+ 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504,
+ 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 1835008,
+ 1966080, 2097152 };
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java Wed Jun 8 21:00:27 2011
@@ -19,7 +19,6 @@ package org.apache.tika.parser.chm.core;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
@@ -39,346 +38,360 @@ import org.apache.tika.parser.chm.lzx.Ch
* Extracts text from chm file. Enumerates chm entries.
*/
public class ChmExtractor {
- private List<ChmLzxBlock> lzxBlocksCache = null;
- private ChmDirectoryListingSet chmDirList = null;
- private ChmItsfHeader chmItsfHeader = null;
- private ChmItspHeader chmItspHeader = null;
- private ChmLzxcResetTable chmLzxcResetTable = null;
- private ChmLzxcControlData chmLzxcControlData = null;
- private byte[] data = null;
- private int indexOfContent;
- private long lzxBlockOffset;
- private long lzxBlockLength;
-
- /**
- * Returns lzxc control data.
- *
- * @return ChmLzxcControlData
- */
- private ChmLzxcControlData getChmLzxcControlData() {
- return chmLzxcControlData;
- }
-
- /**
- * Sets lzxc control data
- *
- * @param chmLzxcControlData
- */
- private void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
- this.chmLzxcControlData = chmLzxcControlData;
- }
-
- private ChmItspHeader getChmItspHeader() {
- return chmItspHeader;
- }
-
- private void setChmItspHeader(ChmItspHeader chmItspHeader) {
- this.chmItspHeader = chmItspHeader;
- }
-
- /**
- * Returns lzxc reset table
- *
- * @return ChmLzxcResetTable
- */
- private ChmLzxcResetTable getChmLzxcResetTable() {
- return chmLzxcResetTable;
- }
-
- /**
- * Sets lzxc reset table
- *
- * @param chmLzxcResetTable
- */
- private void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
- this.chmLzxcResetTable = chmLzxcResetTable;
- }
-
- /**
- * Returns lzxc block length
- *
- * @return lzxBlockLength
- */
- private long getLzxBlockLength() {
- return lzxBlockLength;
- }
-
- /**
- * Sets lzxc block length
- *
- * @param lzxBlockLength
- */
- private void setLzxBlockLength(long lzxBlockLength) {
- this.lzxBlockLength = lzxBlockLength;
- }
-
- /**
- * Returns lzxc block offset
- *
- * @return lzxBlockOffset
- */
- private long getLzxBlockOffset() {
- return lzxBlockOffset;
- }
-
- /**
- * Sets lzxc block offset
- */
- private void setLzxBlockOffset(long lzxBlockOffset) {
- this.lzxBlockOffset = lzxBlockOffset;
- }
-
- private int getIndexOfContent() {
- return indexOfContent;
- }
-
- private void setIndexOfContent(int indexOfContent) {
- this.indexOfContent = indexOfContent;
- }
-
- private byte[] getData() {
- return data;
- }
-
- private void setData(byte[] data) {
- this.data = data;
- }
-
- public ChmExtractor(InputStream is) {
- ChmAssert.assertInputStreamNotNull(is);
- try {
- setData(ChmCommons.toByteArray(is));
-
- /* Creates and parses chm itsf header */
- setChmItsfHeader(new ChmItsfHeader());
- getChmItsfHeader().parse(
- Arrays.copyOfRange(getData(), 0,
- ChmConstants.CHM_ITSF_V3_LEN - 1),
- getChmItsfHeader());
-
- /* Creates and parses chm itsp header */
- setChmItspHeader(new ChmItspHeader());
- getChmItspHeader().parse(
- Arrays.copyOfRange(getData(), (int) getChmItsfHeader()
- .getDirOffset(), (int) getChmItsfHeader()
- .getDirOffset() + ChmConstants.CHM_ITSP_V1_LEN),
- getChmItspHeader());
-
- /* Creates instance of ChmDirListingContainer */
- setChmDirList(new ChmDirectoryListingSet(getData(),
- getChmItsfHeader(), getChmItspHeader()));
-
- int indexOfControlData = getChmDirList().getControlDataIndex();
- int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
- ChmConstants.LZXC.getBytes());
- byte[] dir_chunk = null;
- if (indexOfResetData > 0)
- dir_chunk = Arrays.copyOfRange(
- getData(),
- indexOfResetData,
- indexOfResetData
- + getChmDirList()
- .getDirectoryListingEntryList()
- .get(indexOfControlData).getLength());
-
- /* Creates and parses chm control data */
- setChmLzxcControlData(new ChmLzxcControlData());
- getChmLzxcControlData().parse(dir_chunk, getChmLzxcControlData());
-
- int indexOfResetTable = getChmDirList().getResetTableIndex();
- setChmLzxcResetTable(new ChmLzxcResetTable());
-
- int startIndex = (int) getChmDirList().getDataOffset()
- + getChmDirList().getDirectoryListingEntryList()
- .get(indexOfResetTable).getOffset();
-
- // assert startIndex < data.length
- ChmAssert.assertCopyingDataIndex(startIndex, getData().length);
-
- dir_chunk = Arrays.copyOfRange(getData(), startIndex,
- startIndex
- + getChmDirList().getDirectoryListingEntryList()
- .get(indexOfResetTable).getLength());
-
- getChmLzxcResetTable().parse(dir_chunk, getChmLzxcResetTable());
-
- setIndexOfContent(ChmCommons.indexOf(getChmDirList()
- .getDirectoryListingEntryList(), ChmConstants.CONTENT));
- setLzxBlockOffset((getChmDirList().getDirectoryListingEntryList()
- .get(getIndexOfContent()).getOffset() + getChmItsfHeader()
- .getDataOffset()));
- setLzxBlockLength(getChmDirList().getDirectoryListingEntryList()
- .get(getIndexOfContent()).getLength());
-
- setLzxBlocksCache(new ArrayList<ChmLzxBlock>());
-
- } catch (IOException e) {
- System.err.println(e.getMessage());
- }
- }
-
- /**
- * Enumerates chm entities
- *
- * @return list of chm entities
- */
- public List<String> enumerateChm() {
- List<String> listOfEntries = new ArrayList<String>();
- for (Iterator<DirectoryListingEntry> it = getChmDirList()
- .getDirectoryListingEntryList().iterator(); it.hasNext();) {
- listOfEntries.add(it.next().getName());
- }
- return listOfEntries;
- }
-
- /**
- * Decompresses a chm entry
- *
- * @param directoryListingEntry
- *
- * @return decompressed data
- */
- public byte[][] extractChmEntry(DirectoryListingEntry directoryListingEntry) {
- byte[][] tmp = null;
- byte[] dataSegment = null;
- ChmLzxBlock lzxBlock = null;
- try {
- /* UNCOMPRESSED type is easiest one */
- if (directoryListingEntry.getEntryType() == EntryType.UNCOMPRESSED
- && directoryListingEntry.getLength() > 0
- && !ChmCommons.hasSkip(directoryListingEntry)) {
- int dataOffset = (int) (getChmItsfHeader().getDataOffset() + directoryListingEntry
- .getOffset());
- dataSegment = Arrays.copyOfRange(getData(), dataOffset,
- dataOffset + directoryListingEntry.getLength());
- } else if (directoryListingEntry.getEntryType() == EntryType.COMPRESSED
- && !ChmCommons.hasSkip(directoryListingEntry)) {
- /* Gets a chm block info */
- ChmBlockInfo bb = ChmBlockInfo.getChmBlockInfoInstance(
- directoryListingEntry, (int) getChmLzxcResetTable()
- .getBlockLen(), getChmLzxcControlData());
- tmp = new byte[bb.getEndBlock() - bb.getStartBlock() + 1][];
-
- int i = 0, start = 0, block = 0;
-
- if ((getLzxBlockLength() < Integer.MAX_VALUE)
- && (getLzxBlockOffset() < Integer.MAX_VALUE)) {
- // TODO: Improve the caching
- // caching ... = O(n^2) - depends on startBlock and endBlock
- if (getLzxBlocksCache().size() != 0) {
- for (i = 0; i < getLzxBlocksCache().size(); i++) {
- lzxBlock = getLzxBlocksCache().get(i);
- for (int j = bb.getIniBlock(); j <= bb
- .getStartBlock(); j++) {
- if (lzxBlock.getBlockNumber() == j)
- if (j > start) {
- start = j;
- block = i;
- }
- if (start == bb.getStartBlock())
- break;
- }
- }
- }
-
- if (i == getLzxBlocksCache().size() && i == 0) {
- start = bb.getIniBlock();
-
- dataSegment = ChmCommons.getChmBlockSegment(getData(),
- getChmLzxcResetTable(), start,
- (int) getLzxBlockOffset(),
- (int) getLzxBlockLength());
-
- lzxBlock = new ChmLzxBlock(start, dataSegment,
- getChmLzxcResetTable().getBlockLen(), null);
-
- getLzxBlocksCache().add(lzxBlock);
- } else {
- lzxBlock = getLzxBlocksCache().get(block);
- }
-
- for (i = start; i <= bb.getEndBlock();) {
- if (i == bb.getStartBlock() && i == bb.getEndBlock()) {
- dataSegment = lzxBlock.getContent(
- bb.getStartOffset(), bb.getEndOffset());
- tmp[0] = dataSegment;
- break;
- }
-
- if (i == bb.getStartBlock()) {
- dataSegment = lzxBlock.getContent(bb
- .getStartOffset());
- tmp[0] = dataSegment;
- }
-
- if (i > bb.getStartBlock() && i < bb.getEndBlock()) {
- dataSegment = lzxBlock.getContent();
- tmp[i - bb.getStartBlock()] = dataSegment;
- }
-
- if (i == bb.getEndBlock()) {
- dataSegment = lzxBlock.getContent(0,
- bb.getEndOffset());
- tmp[i - bb.getStartBlock()] = dataSegment;
- break;
- }
-
- i++;
-
- if (i % getChmLzxcControlData().getResetInterval() == 0) {
- lzxBlock = new ChmLzxBlock(i,
- ChmCommons.getChmBlockSegment(getData(),
- getChmLzxcResetTable(), i,
- (int) getLzxBlockOffset(),
- (int) getLzxBlockLength()),
- getChmLzxcResetTable().getBlockLen(), null);
- } else {
- lzxBlock = new ChmLzxBlock(i,
- ChmCommons.getChmBlockSegment(getData(),
- getChmLzxcResetTable(), i,
- (int) getLzxBlockOffset(),
- (int) getLzxBlockLength()),
- getChmLzxcResetTable().getBlockLen(),
- lzxBlock);
- }
-
- getLzxBlocksCache().add(lzxBlock);
- }
-
- if (getLzxBlocksCache().size() > getChmLzxcResetTable()
- .getBlockCount()) {
- getLzxBlocksCache().clear();
- }
- }
- }
- } catch (ChmParsingException e) {
- // e.printStackTrace();
- // System.err.println("Unknown exception");
- }
- return (tmp != null) ? tmp : (new byte[1][]);
- }
-
- private void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
- this.lzxBlocksCache = lzxBlocksCache;
- }
-
- private List<ChmLzxBlock> getLzxBlocksCache() {
- return lzxBlocksCache;
- }
-
- private void setChmDirList(ChmDirectoryListingSet chmDirList) {
- this.chmDirList = chmDirList;
- }
-
- public ChmDirectoryListingSet getChmDirList() {
- return chmDirList;
- }
-
- private void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
- this.chmItsfHeader = chmItsfHeader;
- }
-
- private ChmItsfHeader getChmItsfHeader() {
- return chmItsfHeader;
- }
+ private List<ChmLzxBlock> lzxBlocksCache = null;
+ private ChmDirectoryListingSet chmDirList = null;
+ private ChmItsfHeader chmItsfHeader = null;
+ private ChmItspHeader chmItspHeader = null;
+ private ChmLzxcResetTable chmLzxcResetTable = null;
+ private ChmLzxcControlData chmLzxcControlData = null;
+ private byte[] data = null;
+ private int indexOfContent;
+ private long lzxBlockOffset;
+ private long lzxBlockLength;
+
+ /**
+ * Returns lzxc control data.
+ *
+ * @return ChmLzxcControlData
+ */
+ private ChmLzxcControlData getChmLzxcControlData() {
+ return chmLzxcControlData;
+ }
+
+ /**
+ * Sets lzxc control data
+ *
+ * @param chmLzxcControlData
+ */
+ private void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
+ this.chmLzxcControlData = chmLzxcControlData;
+ }
+
+ private ChmItspHeader getChmItspHeader() {
+ return chmItspHeader;
+ }
+
+ private void setChmItspHeader(ChmItspHeader chmItspHeader) {
+ this.chmItspHeader = chmItspHeader;
+ }
+
+ /**
+ * Returns lzxc reset table
+ *
+ * @return ChmLzxcResetTable
+ */
+ private ChmLzxcResetTable getChmLzxcResetTable() {
+ return chmLzxcResetTable;
+ }
+
+ /**
+ * Sets lzxc reset table
+ *
+ * @param chmLzxcResetTable
+ */
+ private void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
+ this.chmLzxcResetTable = chmLzxcResetTable;
+ }
+
+ /**
+ * Returns lzxc block length
+ *
+ * @return lzxBlockLength
+ */
+ private long getLzxBlockLength() {
+ return lzxBlockLength;
+ }
+
+ /**
+ * Sets lzxc block length
+ *
+ * @param lzxBlockLength
+ */
+ private void setLzxBlockLength(long lzxBlockLength) {
+ this.lzxBlockLength = lzxBlockLength;
+ }
+
+ /**
+ * Returns lzxc block offset
+ *
+ * @return lzxBlockOffset
+ */
+ private long getLzxBlockOffset() {
+ return lzxBlockOffset;
+ }
+
+ /**
+ * Sets lzxc block offset
+ */
+ private void setLzxBlockOffset(long lzxBlockOffset) {
+ this.lzxBlockOffset = lzxBlockOffset;
+ }
+
+ private int getIndexOfContent() {
+ return indexOfContent;
+ }
+
+ private void setIndexOfContent(int indexOfContent) {
+ this.indexOfContent = indexOfContent;
+ }
+
+ private byte[] getData() {
+ return data;
+ }
+
+ private void setData(byte[] data) {
+ this.data = data;
+ }
+
+ public ChmExtractor(InputStream is) {
+ ChmAssert.assertInputStreamNotNull(is);
+ try {
+ setData(ChmCommons.toByteArray(is));
+
+ /* Creates and parses chm itsf header */
+ setChmItsfHeader(new ChmItsfHeader());
+ // getChmItsfHeader().parse(Arrays.copyOfRange(getData(), 0,
+ // ChmConstants.CHM_ITSF_V3_LEN - 1), getChmItsfHeader());
+ getChmItsfHeader().parse(
+ ChmCommons.copyOfRange(getData(), 0,
+ ChmConstants.CHM_ITSF_V3_LEN - 1),
+ getChmItsfHeader());
+
+ /* Creates and parses chm itsp header */
+ setChmItspHeader(new ChmItspHeader());
+ // getChmItspHeader().parse(Arrays.copyOfRange( getData(), (int)
+ // getChmItsfHeader().getDirOffset(),
+ // (int) getChmItsfHeader().getDirOffset() +
+ // ChmConstants.CHM_ITSP_V1_LEN), getChmItspHeader());
+ getChmItspHeader().parse(
+ ChmCommons.copyOfRange(getData(), (int) getChmItsfHeader()
+ .getDirOffset(), (int) getChmItsfHeader()
+ .getDirOffset() + ChmConstants.CHM_ITSP_V1_LEN),
+ getChmItspHeader());
+
+ /* Creates instance of ChmDirListingContainer */
+ setChmDirList(new ChmDirectoryListingSet(getData(),
+ getChmItsfHeader(), getChmItspHeader()));
+
+ int indexOfControlData = getChmDirList().getControlDataIndex();
+ int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
+ ChmConstants.LZXC.getBytes());
+ byte[] dir_chunk = null;
+ if (indexOfResetData > 0)
+ dir_chunk = ChmCommons.copyOfRange(
+ getData(),
+ indexOfResetData,
+ indexOfResetData
+ + getChmDirList()
+ .getDirectoryListingEntryList()
+ .get(indexOfControlData).getLength());
+ // dir_chunk = Arrays.copyOfRange(getData(), indexOfResetData,
+ // indexOfResetData
+ // +
+ // getChmDirList().getDirectoryListingEntryList().get(indexOfControlData).getLength());
+
+ /* Creates and parses chm control data */
+ setChmLzxcControlData(new ChmLzxcControlData());
+ getChmLzxcControlData().parse(dir_chunk, getChmLzxcControlData());
+
+ int indexOfResetTable = getChmDirList().getResetTableIndex();
+ setChmLzxcResetTable(new ChmLzxcResetTable());
+
+ int startIndex = (int) getChmDirList().getDataOffset()
+ + getChmDirList().getDirectoryListingEntryList()
+ .get(indexOfResetTable).getOffset();
+
+ // assert startIndex < data.length
+ ChmAssert.assertCopyingDataIndex(startIndex, getData().length);
+
+ // dir_chunk = Arrays.copyOfRange(getData(), startIndex, startIndex
+ // +
+ // getChmDirList().getDirectoryListingEntryList().get(indexOfResetTable).getLength());
+ dir_chunk = ChmCommons.copyOfRange(getData(), startIndex,
+ startIndex
+ + getChmDirList().getDirectoryListingEntryList()
+ .get(indexOfResetTable).getLength());
+
+ getChmLzxcResetTable().parse(dir_chunk, getChmLzxcResetTable());
+
+ setIndexOfContent(ChmCommons.indexOf(getChmDirList()
+ .getDirectoryListingEntryList(), ChmConstants.CONTENT));
+ setLzxBlockOffset((getChmDirList().getDirectoryListingEntryList()
+ .get(getIndexOfContent()).getOffset() + getChmItsfHeader()
+ .getDataOffset()));
+ setLzxBlockLength(getChmDirList().getDirectoryListingEntryList()
+ .get(getIndexOfContent()).getLength());
+
+ setLzxBlocksCache(new ArrayList<ChmLzxBlock>());
+
+ } catch (IOException e) {
+ System.err.println(e.getMessage());
+ }
+ }
+
+ /**
+ * Enumerates chm entities
+ *
+ * @return list of chm entities
+ */
+ public List<String> enumerateChm() {
+ List<String> listOfEntries = new ArrayList<String>();
+ for (Iterator<DirectoryListingEntry> it = getChmDirList().getDirectoryListingEntryList().iterator(); it.hasNext();) {
+ listOfEntries.add(it.next().getName());
+ }
+ return listOfEntries;
+ }
+
+ /**
+ * Decompresses a chm entry
+ *
+ * @param directoryListingEntry
+ *
+ * @return decompressed data
+ */
+ public byte[][] extractChmEntry(DirectoryListingEntry directoryListingEntry) {
+ byte[][] tmp = null;
+ byte[] dataSegment = null;
+ ChmLzxBlock lzxBlock = null;
+ try {
+ /* UNCOMPRESSED type is easiest one */
+ if (directoryListingEntry.getEntryType() == EntryType.UNCOMPRESSED
+ && directoryListingEntry.getLength() > 0
+ && !ChmCommons.hasSkip(directoryListingEntry)) {
+ int dataOffset = (int) (getChmItsfHeader().getDataOffset() + directoryListingEntry
+ .getOffset());
+ // dataSegment = Arrays.copyOfRange(getData(), dataOffset,
+ // dataOffset + directoryListingEntry.getLength());
+ dataSegment = ChmCommons.copyOfRange(getData(), dataOffset,
+ dataOffset + directoryListingEntry.getLength());
+ } else if (directoryListingEntry.getEntryType() == EntryType.COMPRESSED
+ && !ChmCommons.hasSkip(directoryListingEntry)) {
+ /* Gets a chm block info */
+ ChmBlockInfo bb = ChmBlockInfo.getChmBlockInfoInstance(
+ directoryListingEntry, (int) getChmLzxcResetTable()
+ .getBlockLen(), getChmLzxcControlData());
+ tmp = new byte[bb.getEndBlock() - bb.getStartBlock() + 1][];
+
+ int i = 0, start = 0, block = 0;
+
+ if ((getLzxBlockLength() < Integer.MAX_VALUE)
+ && (getLzxBlockOffset() < Integer.MAX_VALUE)) {
+ // TODO: Improve the caching
+ // caching ... = O(n^2) - depends on startBlock and endBlock
+ if (getLzxBlocksCache().size() != 0) {
+ for (i = 0; i < getLzxBlocksCache().size(); i++) {
+ lzxBlock = getLzxBlocksCache().get(i);
+ for (int j = bb.getIniBlock(); j <= bb
+ .getStartBlock(); j++) {
+ if (lzxBlock.getBlockNumber() == j)
+ if (j > start) {
+ start = j;
+ block = i;
+ }
+ if (start == bb.getStartBlock())
+ break;
+ }
+ }
+ }
+
+ if (i == getLzxBlocksCache().size() && i == 0) {
+ start = bb.getIniBlock();
+
+ dataSegment = ChmCommons.getChmBlockSegment(getData(),
+ getChmLzxcResetTable(), start,
+ (int) getLzxBlockOffset(),
+ (int) getLzxBlockLength());
+
+ lzxBlock = new ChmLzxBlock(start, dataSegment,
+ getChmLzxcResetTable().getBlockLen(), null);
+
+ getLzxBlocksCache().add(lzxBlock);
+ } else {
+ lzxBlock = getLzxBlocksCache().get(block);
+ }
+
+ for (i = start; i <= bb.getEndBlock();) {
+ if (i == bb.getStartBlock() && i == bb.getEndBlock()) {
+ dataSegment = lzxBlock.getContent(
+ bb.getStartOffset(), bb.getEndOffset());
+ tmp[0] = dataSegment;
+ break;
+ }
+
+ if (i == bb.getStartBlock()) {
+ dataSegment = lzxBlock.getContent(bb
+ .getStartOffset());
+ tmp[0] = dataSegment;
+ }
+
+ if (i > bb.getStartBlock() && i < bb.getEndBlock()) {
+ dataSegment = lzxBlock.getContent();
+ tmp[i - bb.getStartBlock()] = dataSegment;
+ }
+
+ if (i == bb.getEndBlock()) {
+ dataSegment = lzxBlock.getContent(0,
+ bb.getEndOffset());
+ tmp[i - bb.getStartBlock()] = dataSegment;
+ break;
+ }
+
+ i++;
+
+ if (i % getChmLzxcControlData().getResetInterval() == 0) {
+ lzxBlock = new ChmLzxBlock(i,
+ ChmCommons.getChmBlockSegment(getData(),
+ getChmLzxcResetTable(), i,
+ (int) getLzxBlockOffset(),
+ (int) getLzxBlockLength()),
+ getChmLzxcResetTable().getBlockLen(), null);
+ } else {
+ lzxBlock = new ChmLzxBlock(i,
+ ChmCommons.getChmBlockSegment(getData(),
+ getChmLzxcResetTable(), i,
+ (int) getLzxBlockOffset(),
+ (int) getLzxBlockLength()),
+ getChmLzxcResetTable().getBlockLen(),
+ lzxBlock);
+ }
+
+ getLzxBlocksCache().add(lzxBlock);
+ }
+
+ if (getLzxBlocksCache().size() > getChmLzxcResetTable()
+ .getBlockCount()) {
+ getLzxBlocksCache().clear();
+ }
+ }
+ }
+ } catch (ChmParsingException e) {
+ // e.printStackTrace();
+ // System.err.println("Unknown exception");
+ }
+ return (tmp != null) ? tmp : (new byte[1][]);
+ }
+
+ private void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
+ this.lzxBlocksCache = lzxBlocksCache;
+ }
+
+ private List<ChmLzxBlock> getLzxBlocksCache() {
+ return lzxBlocksCache;
+ }
+
+ private void setChmDirList(ChmDirectoryListingSet chmDirList) {
+ this.chmDirList = chmDirList;
+ }
+
+ public ChmDirectoryListingSet getChmDirList() {
+ return chmDirList;
+ }
+
+ private void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
+ this.chmItsfHeader = chmItsfHeader;
+ }
+
+ private ChmItsfHeader getChmItsfHeader() {
+ return chmItsfHeader;
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java Wed Jun 8 21:00:27 2011
@@ -10,121 +10,121 @@ import org.apache.tika.parser.chm.access
import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
public class ChmWrapper {
- private List<ChmLzxBlock> lzxBlocksCache = null;
- private ChmDirectoryListingSet chmDirList = null;
- private ChmItsfHeader chmItsfHeader = null;
- private ChmItspHeader chmItspHeader = null;
- private ChmLzxcResetTable chmLzxcResetTable = null;
- private ChmLzxcControlData chmLzxcControlData = null;
- private byte[] data = null;
- private int indexOfContent;
- private long lzxBlockOffset;
- private long lzxBlockLength;
- private int indexOfResetData;
- private int indexOfResetTable;
- private int startIndex;
-
- protected int getStartIndex() {
- return startIndex;
- }
-
- protected void setStartIndex(int startIndex) {
- this.startIndex = startIndex;
- }
-
- protected int getIndexOfResetTable() {
- return indexOfResetTable;
- }
-
- protected void setIndexOfResetTable(int indexOfResetTable) {
- this.indexOfResetTable = indexOfResetTable;
- }
-
- protected List<ChmLzxBlock> getLzxBlocksCache() {
- return lzxBlocksCache;
- }
-
- protected void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
- this.lzxBlocksCache = lzxBlocksCache;
- }
-
- protected ChmDirectoryListingSet getChmDirList() {
- return chmDirList;
- }
-
- protected void setChmDirList(ChmDirectoryListingSet chmDirList) {
- this.chmDirList = chmDirList;
- }
-
- protected ChmItsfHeader getChmItsfHeader() {
- return chmItsfHeader;
- }
-
- protected void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
- this.chmItsfHeader = chmItsfHeader;
- }
-
- protected ChmLzxcResetTable getChmLzxcResetTable() {
- return chmLzxcResetTable;
- }
-
- protected void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
- this.chmLzxcResetTable = chmLzxcResetTable;
- }
-
- protected ChmLzxcControlData getChmLzxcControlData() {
- return chmLzxcControlData;
- }
-
- protected void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
- this.chmLzxcControlData = chmLzxcControlData;
- }
-
- protected byte[] getData() {
- return data;
- }
-
- protected void setData(byte[] data) {
- this.data = data;
- }
-
- protected int getIndexOfContent() {
- return indexOfContent;
- }
-
- protected void setIndexOfContent(int indexOfContent) {
- this.indexOfContent = indexOfContent;
- }
-
- protected long getLzxBlockOffset() {
- return lzxBlockOffset;
- }
-
- protected void setLzxBlockOffset(long lzxBlockOffset) {
- this.lzxBlockOffset = lzxBlockOffset;
- }
-
- protected long getLzxBlockLength() {
- return lzxBlockLength;
- }
-
- protected void setLzxBlockLength(long lzxBlockLength) {
- this.lzxBlockLength = lzxBlockLength;
- }
-
- protected void setChmItspHeader(ChmItspHeader chmItspHeader) {
- this.chmItspHeader = chmItspHeader;
- }
-
- protected ChmItspHeader getChmItspHeader() {
- return chmItspHeader;
- }
-
- protected void setIndexOfResetData(int indexOfResetData) {
- this.indexOfResetData = indexOfResetData;
- }
-
- protected int getIndexOfResetData() {
- return indexOfResetData;
- }
+ private List<ChmLzxBlock> lzxBlocksCache = null;
+ private ChmDirectoryListingSet chmDirList = null;
+ private ChmItsfHeader chmItsfHeader = null;
+ private ChmItspHeader chmItspHeader = null;
+ private ChmLzxcResetTable chmLzxcResetTable = null;
+ private ChmLzxcControlData chmLzxcControlData = null;
+ private byte[] data = null;
+ private int indexOfContent;
+ private long lzxBlockOffset;
+ private long lzxBlockLength;
+ private int indexOfResetData;
+ private int indexOfResetTable;
+ private int startIndex;
+
+ protected int getStartIndex() {
+ return startIndex;
+ }
+
+ protected void setStartIndex(int startIndex) {
+ this.startIndex = startIndex;
+ }
+
+ protected int getIndexOfResetTable() {
+ return indexOfResetTable;
+ }
+
+ protected void setIndexOfResetTable(int indexOfResetTable) {
+ this.indexOfResetTable = indexOfResetTable;
+ }
+
+ protected List<ChmLzxBlock> getLzxBlocksCache() {
+ return lzxBlocksCache;
+ }
+
+ protected void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
+ this.lzxBlocksCache = lzxBlocksCache;
+ }
+
+ protected ChmDirectoryListingSet getChmDirList() {
+ return chmDirList;
+ }
+
+ protected void setChmDirList(ChmDirectoryListingSet chmDirList) {
+ this.chmDirList = chmDirList;
+ }
+
+ protected ChmItsfHeader getChmItsfHeader() {
+ return chmItsfHeader;
+ }
+
+ protected void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
+ this.chmItsfHeader = chmItsfHeader;
+ }
+
+ protected ChmLzxcResetTable getChmLzxcResetTable() {
+ return chmLzxcResetTable;
+ }
+
+ protected void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
+ this.chmLzxcResetTable = chmLzxcResetTable;
+ }
+
+ protected ChmLzxcControlData getChmLzxcControlData() {
+ return chmLzxcControlData;
+ }
+
+ protected void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
+ this.chmLzxcControlData = chmLzxcControlData;
+ }
+
+ protected byte[] getData() {
+ return data;
+ }
+
+ protected void setData(byte[] data) {
+ this.data = data;
+ }
+
+ protected int getIndexOfContent() {
+ return indexOfContent;
+ }
+
+ protected void setIndexOfContent(int indexOfContent) {
+ this.indexOfContent = indexOfContent;
+ }
+
+ protected long getLzxBlockOffset() {
+ return lzxBlockOffset;
+ }
+
+ protected void setLzxBlockOffset(long lzxBlockOffset) {
+ this.lzxBlockOffset = lzxBlockOffset;
+ }
+
+ protected long getLzxBlockLength() {
+ return lzxBlockLength;
+ }
+
+ protected void setLzxBlockLength(long lzxBlockLength) {
+ this.lzxBlockLength = lzxBlockLength;
+ }
+
+ protected void setChmItspHeader(ChmItspHeader chmItspHeader) {
+ this.chmItspHeader = chmItspHeader;
+ }
+
+ protected ChmItspHeader getChmItspHeader() {
+ return chmItspHeader;
+ }
+
+ protected void setIndexOfResetData(int indexOfResetData) {
+ this.indexOfResetData = indexOfResetData;
+ }
+
+ protected int getIndexOfResetData() {
+ return indexOfResetData;
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java Wed Jun 8 21:00:27 2011
@@ -17,13 +17,13 @@
package org.apache.tika.parser.chm.exception;
public class ChmParsingException extends RuntimeException {
- private static final long serialVersionUID = 6497936044733665210L;
+ private static final long serialVersionUID = 6497936044733665210L;
- public ChmParsingException() {
- super();
- }
+ public ChmParsingException() {
+ super();
+ }
- public ChmParsingException(String description) {
- super(description);
- }
+ public ChmParsingException(String description) {
+ super(description);
+ }
}