You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/06/07 17:44:42 UTC
svn commit: r1133047 [2/3] - in /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm: accessor/ assertion/ core/ exception/ lzx/

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.assertion;
+
+import java.io.InputStream;
+
+import org.apache.tika.parser.chm.accessor.ChmAccessor;
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+/**
+ * Contains chm extractor assertions
+ */
+public class ChmAssert {
+	/**
+	 * Checks a validity of the chmBlockSegment parameters
+	 * 
+	 * @param data
+	 *            byte[]
+	 * @param resetTable
+	 *            ChmLzxcResetTable
+	 * @param blockNumber
+	 *            int
+	 * @param lzxcBlockOffset
+	 *            int
+	 * @param lzxcBlockLength
+	 *            int
+	 */
+	public static final void assertChmBlockSegment(byte[] data,
+			ChmLzxcResetTable resetTable, int blockNumber, int lzxcBlockOffset,
+			int lzxcBlockLength) {
+		if ((data == null))
+			throw new ChmParsingException("data[] is null");
+
+		if ((data.length <= 0))
+			throw new ChmParsingException(
+					"data[] length should be greater than zero");
+
+		if (resetTable == null)
+			throw new ChmParsingException("resetTable is null");
+
+		if (resetTable.getBlockAddress().length <= 1)
+			throw new ChmParsingException(
+					"resetTable.getBlockAddress().length should be greater than zero");
+
+		if (blockNumber < 0)
+			throw new ChmParsingException(
+					"blockNumber should be positive number");
+
+		if (lzxcBlockOffset < 0)
+			throw new ChmParsingException(
+					"lzxcBlockOffset should be positive number");
+
+		if (lzxcBlockLength < 0)
+			throw new ChmParsingException(
+					"lzxcBlockLength should be positive number");
+	}
+
+	/**
+	 * Checks if InputStream is not null
+	 * 
+	 * @param is
+	 *            InputStream
+	 */
+	public static final void assertInputStreamNotNull(InputStream is) {
+		if (is == null)
+			throw new ChmParsingException("input sream is null");
+	}
+
+	/**
+	 * Checks validity of ChmAccessor parameters
+	 * 
+	 * @param data
+	 * @param chmItsfHeader
+	 * @param count
+	 */
+	public static final void assertChmAccessorParameters(byte[] data,
+			ChmAccessor<?> chmAccessor, int count) {
+		assertByteArrayNotNull(data);
+		assertChmAccessorNotNull(chmAccessor);
+	}
+
+	/**
+	 * Checks if byte[] is not null
+	 * 
+	 * @param data
+	 */
+	public static final void assertByteArrayNotNull(byte[] data) {
+		if (data == null)
+			throw new ChmParsingException("byte[] data is null");
+	}
+
+	/**
+	 * Checks if ChmAccessor is not null In case of null throws exception
+	 * 
+	 * @param ChmAccessor
+	 */
+	public static final void assertChmAccessorNotNull(ChmAccessor<?> chmAccessor) {
+		if (chmAccessor == null)
+			throw new ChmParsingException("chm header is null");
+	}
+
+	/**
+	 * Checks validity of the DirectoryListingEntry's parameters In case of
+	 * invalid parameter(s) throws an exception
+	 * 
+	 * @param name_length
+	 *            length of the chm entry name
+	 * @param name
+	 *            chm entry name
+	 * @param entryType
+	 *            EntryType
+	 * @param offset
+	 * @param length
+	 */
+	public static final void assertDirectoryListingEntry(int name_length,
+			String name, ChmCommons.EntryType entryType, int offset, int length) {
+		if (name_length < 0)
+			throw new ChmParsingException("invalid name length");
+		if (name == null)
+			throw new ChmParsingException("invalid name");
+
+		if ((entryType != ChmCommons.EntryType.COMPRESSED)
+				&& (entryType != ChmCommons.EntryType.UNCOMPRESSED))
+			throw new ChmParsingException(
+					"invalid compressed type, should be EntryType.COMPRESSED | EntryType.UNCOMPRESSED");
+
+		if (offset < 0)
+			throw new ChmParsingException("invalid offset");
+
+		if (length < 0)
+			throw new ChmParsingException("invalid length");
+	}
+
+	public static void assertCopyingDataIndex(int index, int dataLength) {
+		if (index >= dataLength)
+			throw new ChmParsingException(
+					"cannot parse chm file index > data.length");
+	}
+
+	/**
+	 * Checks if int param is greater than zero In case param <=0 throws an
+	 * exception
+	 * 
+	 * @param param
+	 */
+	public static void assertPositiveInt(int param) {
+		if (param <= 0)
+			throw new ChmParsingException(
+					"resetTable.getBlockAddress().length should be greater than zero");
+	}
+}

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,374 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.core;
+
+import java.io.ByteArrayOutputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.apache.tika.parser.chm.assertion.ChmAssert;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+public class ChmCommons {
+	/* Prevents initialization */
+	private ChmCommons() {
+	}
+
+	public static void assertByteArrayNotNull(byte[] data) {
+		if (data == null)
+			throw new ChmParsingException("byte[] is null");
+	}
+
+	/**
+	 * Represents entry types: uncompressed, compressed
+	 */
+	public enum EntryType {
+		UNCOMPRESSED, COMPRESSED
+	}
+
+	/**
+	 * Represents lzx states: started decoding, not started decoding
+	 */
+	public enum LzxState {
+		STARTED_DECODING, NOT_STARTED_DECODING
+	}
+
+	/**
+	 * Represents intel file states during decompression
+	 */
+	public enum IntelState {
+		STARTED, NOT_STARTED
+	}
+
+	/**
+	 * Represents lzx block types in order to decompress differently
+	 */
+	public final static int UNDEFINED = 0;
+	public final static int VERBATIM = 1;
+	public final static int ALIGNED_OFFSET = 2;
+	public final static int UNCOMPRESSED = 3;
+
+	/**
+	 * LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) Returns X,
+	 * i.e 2^X
+	 * 
+	 * @param window
+	 *            chmLzxControlData.getWindowSize()
+	 * 
+	 * @return window size
+	 */
+	public static int getWindowSize(int window) {
+		int win = 0;
+		while (window > 1) {
+			window >>>= 1;
+			win++;
+		}
+		return win;
+	}
+
+	public static byte[] getChmBlockSegment(byte[] data,
+			ChmLzxcResetTable resetTable, int blockNumber, int lzxcBlockOffset,
+			int lzxcBlockLength) {
+		ChmAssert.assertChmBlockSegment(data, resetTable, blockNumber,
+				lzxcBlockOffset, lzxcBlockLength);
+		int blockLength = -1;
+		// TODO add int_max_value checking
+		if (blockNumber < (resetTable.getBlockAddress().length - 1)) {
+			blockLength = (int) (resetTable.getBlockAddress()[blockNumber + 1] - resetTable
+					.getBlockAddress()[blockNumber]);
+		} else {
+			/* new code */
+			if (blockNumber >= resetTable.getBlockAddress().length)
+				blockLength = 0;
+			else
+				/* end new code */
+				blockLength = (int) (lzxcBlockLength - resetTable
+						.getBlockAddress()[blockNumber]);
+		}
+		byte[] t = Arrays
+				.copyOfRange(
+						data,
+						(int) (lzxcBlockOffset + resetTable.getBlockAddress()[blockNumber]),
+						(int) (lzxcBlockOffset
+								+ resetTable.getBlockAddress()[blockNumber] + blockLength));
+		return (t != null) ? t : new byte[1];
+	}
+
+	/**
+	 * Returns textual representation of LangID
+	 * 
+	 * @param langID
+	 * 
+	 * @return language name
+	 */
+	public static String getLanguage(long langID) {
+		/* Potential problem with casting */
+		switch ((int) langID) {
+		case 1025:
+			return "Arabic";
+		case 1069:
+			return "Basque";
+		case 1027:
+			return "Catalan";
+		case 2052:
+			return "Chinese (Simplified)";
+		case 1028:
+			return "Chinese (Traditional)";
+		case 1029:
+			return "Czech";
+		case 1030:
+			return "Danish";
+		case 1043:
+			return "Dutch";
+		case 1033:
+			return "English (United States)";
+		case 1035:
+			return "Finnish";
+		case 1036:
+			return "French";
+		case 1031:
+			return "German";
+		case 1032:
+			return "Greek";
+		case 1037:
+			return "Hebrew";
+		case 1038:
+			return "Hungarian";
+		case 1040:
+			return "Italian";
+		case 1041:
+			return "Japanese";
+		case 1042:
+			return "Korean";
+		case 1044:
+			return "Norwegian";
+		case 1045:
+			return "Polish";
+		case 2070:
+			return "Portuguese";
+		case 1046:
+			return "Portuguese (Brazil)";
+		case 1049:
+			return "Russian";
+		case 1051:
+			return "Slovakian";
+		case 1060:
+			return "Slovenian";
+		case 3082:
+			return "Spanish";
+		case 1053:
+			return "Swedish";
+		case 1055:
+			return "Turkish";
+		default:
+			return "unknown - http://msdn.microsoft.com/en-us/library/bb165625%28VS.80%29.aspx";
+		}
+	}
+
+	/**
+	 * Checks skippable patterns
+	 * 
+	 * @param directoryListingEntry
+	 * 
+	 * @return boolean
+	 */
+	public static boolean hasSkip(DirectoryListingEntry directoryListingEntry) {
+		return (directoryListingEntry.getName().startsWith("/$")
+				|| directoryListingEntry.getName().startsWith("/#") || directoryListingEntry
+				.getName().startsWith("::")) ? true : false;
+	}
+
+	/**
+	 * Writes byte[][] to the file
+	 * 
+	 * @param buffer
+	 * @param fileToBeSaved
+	 *            file name
+	 */
+	public static void writeFile(byte[][] buffer, String fileToBeSaved) {
+		FileOutputStream output = null;
+		if (buffer != null && fileToBeSaved != null && !fileToBeSaved.isEmpty()) {
+			try {
+				output = new FileOutputStream(fileToBeSaved);
+				if (output != null)
+					for (int i = 0; i < buffer.length; i++) {
+						output.write(buffer[i]);
+					}
+			} catch (FileNotFoundException e) {
+				System.err.println("The " + fileToBeSaved
+						+ " does not seem correct");
+			} catch (IOException e) {
+				e.printStackTrace();
+			} finally {
+				if (output != null)
+					try {
+						output.flush();
+						output.close();
+					} catch (IOException e) {
+						e.printStackTrace();
+					}
+			}
+		}
+	}
+
+	/**
+	 * Reverses the order of given array
+	 * 
+	 * @param array
+	 */
+	public static void reverse(byte[] array) {
+		if (array == null) {
+			return;
+		}
+		int i = 0;
+		int j = array.length - 1;
+		byte tmp;
+		while (j > i) {
+			tmp = array[j];
+			array[j] = array[i];
+			array[i] = tmp;
+			j--;
+			i++;
+		}
+	}
+
+	/**
+	 * Returns byte array Closes the InputStream
+	 * 
+	 * @param is
+	 *            InputStream of chm file
+	 * 
+	 * @return byte array
+	 * 
+	 * @throws IOException
+	 */
+	public static byte[] toByteArray(InputStream is) throws IOException {
+		if (is != null) {
+			ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+			int nRead;
+			byte[] data = new byte[16384];
+			while ((nRead = is.read(data, 0, data.length)) != -1) {
+				buffer.write(data, 0, nRead);
+			}
+			buffer.flush();
+			try {
+				is.close();
+				buffer.close();
+			} catch (Exception e) {
+				System.err.println(e.getMessage());
+			}
+			return buffer.toByteArray();
+		} else
+			throw new ChmParsingException("InputStream is null");
+	}
+
+	/**
+	 * Returns an index of the reset table
+	 * 
+	 * @param text
+	 * @param pattern
+	 * @return index of the reset table
+	 */
+	public static final int indexOfResetTableBlock(byte[] text, byte[] pattern) {
+		return (indexOf(text, pattern)) - 4;
+	}
+
+	/**
+	 * Searches some pattern in byte[]
+	 * 
+	 * @param text
+	 *            byte[]
+	 * @param pattern
+	 *            byte[]
+	 * @return an index, if nothing found returns -1
+	 */
+	public static int indexOf(byte[] text, byte[] pattern) {
+		int[] next = null;
+		int i = 0, j = -1;
+
+		/* Preprocessing */
+		if (pattern != null && text != null) {
+			next = new int[pattern.length];
+			next[0] = -1;
+		} else
+			throw new ChmParsingException(
+					"pattern and/or text should not be null");
+
+		/* Computes a failure function */
+		while (i < pattern.length - 1) {
+			if (j == -1 || pattern[i] == pattern[j]) {
+				i++;
+				j++;
+				if (pattern[i] != pattern[j])
+					next[i] = j;
+				else
+					next[i] = next[j];
+			} else
+				j = next[j];
+		}
+
+		/* Reinitializes local variables */
+		i = j = 0;
+
+		/* Matching */
+		while (i < text.length && j < pattern.length) {
+			if (j == -1 || pattern[j] == text[i]) {
+				i++;
+				j++;
+			} else
+				j = next[j];
+		}
+		if (j == pattern.length)
+			return (i - j); // match found at offset i - M
+		else
+			return -1; // not found
+	}
+
+	/**
+	 * Searches for some pattern in the directory listing entry list
+	 * 
+	 * @param list
+	 * @param pattern
+	 * @return an index, if nothing found returns -1
+	 */
+	public static int indexOf(List<DirectoryListingEntry> list, String pattern) {
+		int place = 0;
+		for (Iterator<DirectoryListingEntry> iterator = list.iterator(); iterator
+				.hasNext();) {
+			DirectoryListingEntry directoryListingEntry = iterator.next();
+			if (directoryListingEntry.toString().contains(pattern)) {
+				return place;
+			} else
+				++place;
+		}
+		return -1;// not found
+	}
+
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) {
+	}
+
+}

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.core;
+
+public class ChmConstants {
+	/* Prevents instantiation */
+	private ChmConstants() {
+	}
+
+	public static final String DEFAULT_CHARSET = "UTF-8";
+	public static final String ITSF = "ITSF";
+	public static final String ITSP = "ITSP";
+	public static final String PMGL = "PMGL";
+	public static final String LZXC = "LZXC";
+	public static final String CHM_PMGI_MARKER = "PMGI";
+	public static final int BYTE_ARRAY_LENGHT = 16;
+	public static final int CHM_ITSF_V2_LEN = 0x58;
+	public static final int CHM_ITSF_V3_LEN = 0x60;
+	public static final int CHM_ITSP_V1_LEN = 0x54;
+	public static final int CHM_PMGL_LEN = 0x14;
+	public static final int CHM_PMGI_LEN = 0x08;
+	public static final int CHM_LZXC_RESETTABLE_V1_LEN = 0x28;
+	public static final int CHM_LZXC_MIN_LEN = 0x18;
+	public static final int CHM_LZXC_V2_LEN = 0x1c;
+	public static final int CHM_SIGNATURE_LEN = 4;
+	public static final int CHM_VER_2 = 2;
+	public static final int CHM_VER_3 = 3;
+	public static final int CHM_VER_1 = 1;
+	public static final int CHM_WINDOW_SIZE_BLOCK = 0x8000;
+
+	/* my hacking */
+	public static final int START_PMGL = 0xCC;
+	public static final String CONTROL_DATA = "ControlData";
+	public static final String RESET_TABLE = "ResetTable";
+	public static final String CONTENT = "Content";
+
+	/* some constants defined by the LZX specification */
+	public static final int LZX_MIN_MATCH = 2;
+	public static final int LZX_MAX_MATCH = 257;
+	public static final int LZX_NUM_CHARS = 256;
+	public static final int LZX_BLOCKTYPE_INVALID = 0; /*
+														 * also blocktypes 4-7
+														 * invalid
+														 */
+	public static final int LZX_BLOCKTYPE_VERBATIM = 1;
+	public static final int LZX_BLOCKTYPE_ALIGNED = 2;
+	public static final int LZX_BLOCKTYPE_UNCOMPRESSED = 3;
+	public static final int LZX_PRETREE_NUM_ELEMENTS_BITS = 4; /* ??? */
+	public static final int LZX_PRETREE_NUM_ELEMENTS = 20;
+	public static final int LZX_ALIGNED_NUM_ELEMENTS = 8; /*
+														 * aligned offset tree
+														 * #elements
+														 */
+	public static final int LZX_NUM_PRIMARY_LENGTHS = 7; /*
+														 * this one missing from
+														 * spec!
+														 */
+	public static final int LZX_NUM_SECONDARY_LENGTHS = 249; /*
+															 * length tree
+															 * #elements
+															 */
+
+	/* LZX huffman defines: tweak tablebits as desired */
+	public static final int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
+	public static final int LZX_PRETREE_TABLEBITS = 6;
+	public static final int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
+	public static final int LZX_MAIN_MAXSYMBOLS = LZX_NUM_CHARS * 2;
+	public static final int LZX_MAINTREE_TABLEBITS = 12;
+	public static final int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
+	public static final int LZX_LENGTH_TABLEBITS = 12;
+	public static final int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
+	public static final int LZX_ALIGNED_TABLEBITS = 7;
+	public static final int LZX_LENTABLE_SAFETY = 64;
+
+	public static short[] EXTRA_BITS = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
+			5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
+			15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+			17, 17 };
+
+	public static int[] POSITION_BASE = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32,
+			48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072,
+			4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304,
+			131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504,
+			1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 1835008,
+			1966080, 2097152 };
+}

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,384 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.core;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.apache.tika.parser.chm.assertion.ChmAssert;
+import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+import org.apache.tika.parser.chm.lzx.ChmBlockInfo;
+import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
+
+/**
+ * Extracts text from chm file. Enumerates chm entries.
+ */
+public class ChmExtractor {
+	private List<ChmLzxBlock> lzxBlocksCache = null;
+	private ChmDirectoryListingSet chmDirList = null;
+	private ChmItsfHeader chmItsfHeader = null;
+	private ChmItspHeader chmItspHeader = null;
+	private ChmLzxcResetTable chmLzxcResetTable = null;
+	private ChmLzxcControlData chmLzxcControlData = null;
+	private byte[] data = null;
+	private int indexOfContent;
+	private long lzxBlockOffset;
+	private long lzxBlockLength;
+
+	/**
+	 * Returns lzxc control data.
+	 * 
+	 * @return ChmLzxcControlData
+	 */
+	private ChmLzxcControlData getChmLzxcControlData() {
+		return chmLzxcControlData;
+	}
+
+	/**
+	 * Sets lzxc control data
+	 * 
+	 * @param chmLzxcControlData
+	 */
+	private void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
+		this.chmLzxcControlData = chmLzxcControlData;
+	}
+
+	private ChmItspHeader getChmItspHeader() {
+		return chmItspHeader;
+	}
+
+	private void setChmItspHeader(ChmItspHeader chmItspHeader) {
+		this.chmItspHeader = chmItspHeader;
+	}
+
+	/**
+	 * Returns lzxc reset table
+	 * 
+	 * @return ChmLzxcResetTable
+	 */
+	private ChmLzxcResetTable getChmLzxcResetTable() {
+		return chmLzxcResetTable;
+	}
+
+	/**
+	 * Sets lzxc reset table
+	 * 
+	 * @param chmLzxcResetTable
+	 */
+	private void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
+		this.chmLzxcResetTable = chmLzxcResetTable;
+	}
+
+	/**
+	 * Returns lzxc block length
+	 * 
+	 * @return lzxBlockLength
+	 */
+	private long getLzxBlockLength() {
+		return lzxBlockLength;
+	}
+
+	/**
+	 * Sets lzxc block length
+	 * 
+	 * @param lzxBlockLength
+	 */
+	private void setLzxBlockLength(long lzxBlockLength) {
+		this.lzxBlockLength = lzxBlockLength;
+	}
+
+	/**
+	 * Returns lzxc block offset
+	 * 
+	 * @return lzxBlockOffset
+	 */
+	private long getLzxBlockOffset() {
+		return lzxBlockOffset;
+	}
+
+	/**
+	 * Sets lzxc block offset
+	 */
+	private void setLzxBlockOffset(long lzxBlockOffset) {
+		this.lzxBlockOffset = lzxBlockOffset;
+	}
+
+	private int getIndexOfContent() {
+		return indexOfContent;
+	}
+
+	private void setIndexOfContent(int indexOfContent) {
+		this.indexOfContent = indexOfContent;
+	}
+
+	private byte[] getData() {
+		return data;
+	}
+
+	private void setData(byte[] data) {
+		this.data = data;
+	}
+
+	public ChmExtractor(InputStream is) {
+		ChmAssert.assertInputStreamNotNull(is);
+		try {
+			setData(ChmCommons.toByteArray(is));
+
+			/* Creates and parses chm itsf header */
+			setChmItsfHeader(new ChmItsfHeader());
+			getChmItsfHeader().parse(
+					Arrays.copyOfRange(getData(), 0,
+							ChmConstants.CHM_ITSF_V3_LEN - 1),
+					getChmItsfHeader());
+
+			/* Creates and parses chm itsp header */
+			setChmItspHeader(new ChmItspHeader());
+			getChmItspHeader().parse(
+					Arrays.copyOfRange(getData(), (int) getChmItsfHeader()
+							.getDirOffset(), (int) getChmItsfHeader()
+							.getDirOffset() + ChmConstants.CHM_ITSP_V1_LEN),
+					getChmItspHeader());
+
+			/* Creates instance of ChmDirListingContainer */
+			setChmDirList(new ChmDirectoryListingSet(getData(),
+					getChmItsfHeader(), getChmItspHeader()));
+
+			int indexOfControlData = getChmDirList().getControlDataIndex();
+			int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
+					ChmConstants.LZXC.getBytes());
+			byte[] dir_chunk = null;
+			if (indexOfResetData > 0)
+				dir_chunk = Arrays.copyOfRange(
+						getData(),
+						indexOfResetData,
+						indexOfResetData
+								+ getChmDirList()
+										.getDirectoryListingEntryList()
+										.get(indexOfControlData).getLength());
+
+			/* Creates and parses chm control data */
+			setChmLzxcControlData(new ChmLzxcControlData());
+			getChmLzxcControlData().parse(dir_chunk, getChmLzxcControlData());
+
+			int indexOfResetTable = getChmDirList().getResetTableIndex();
+			setChmLzxcResetTable(new ChmLzxcResetTable());
+
+			int startIndex = (int) getChmDirList().getDataOffset()
+					+ getChmDirList().getDirectoryListingEntryList()
+							.get(indexOfResetTable).getOffset();
+
+			// assert startIndex < data.length
+			ChmAssert.assertCopyingDataIndex(startIndex, getData().length);
+
+			dir_chunk = Arrays.copyOfRange(getData(), startIndex,
+					startIndex
+							+ getChmDirList().getDirectoryListingEntryList()
+									.get(indexOfResetTable).getLength());
+
+			getChmLzxcResetTable().parse(dir_chunk, getChmLzxcResetTable());
+
+			setIndexOfContent(ChmCommons.indexOf(getChmDirList()
+					.getDirectoryListingEntryList(), ChmConstants.CONTENT));
+			setLzxBlockOffset((getChmDirList().getDirectoryListingEntryList()
+					.get(getIndexOfContent()).getOffset() + getChmItsfHeader()
+					.getDataOffset()));
+			setLzxBlockLength(getChmDirList().getDirectoryListingEntryList()
+					.get(getIndexOfContent()).getLength());
+
+			setLzxBlocksCache(new ArrayList<ChmLzxBlock>());
+
+		} catch (IOException e) {
+			System.err.println(e.getMessage());
+		}
+	}
+
+	/**
+	 * Enumerates chm entities
+	 * 
+	 * @return list of chm entities
+	 */
+	public List<String> enumerateChm() {
+		List<String> listOfEntries = new ArrayList<String>();
+		for (Iterator<DirectoryListingEntry> it = getChmDirList()
+				.getDirectoryListingEntryList().iterator(); it.hasNext();) {
+			listOfEntries.add(it.next().getName());
+		}
+		return listOfEntries;
+	}
+
+	/**
+	 * Decompresses a chm entry
+	 * 
+	 * @param directoryListingEntry
+	 * 
+	 * @return decompressed data
+	 */
+	public byte[][] extractChmEntry(DirectoryListingEntry directoryListingEntry) {
+		byte[][] tmp = null;
+		byte[] dataSegment = null;
+		ChmLzxBlock lzxBlock = null;
+		try {
+			/* UNCOMPRESSED type is easiest one */
+			if (directoryListingEntry.getEntryType() == EntryType.UNCOMPRESSED
+					&& directoryListingEntry.getLength() > 0
+					&& !ChmCommons.hasSkip(directoryListingEntry)) {
+				int dataOffset = (int) (getChmItsfHeader().getDataOffset() + directoryListingEntry
+						.getOffset());
+				dataSegment = Arrays.copyOfRange(getData(), dataOffset,
+						dataOffset + directoryListingEntry.getLength());
+			} else if (directoryListingEntry.getEntryType() == EntryType.COMPRESSED
+					&& !ChmCommons.hasSkip(directoryListingEntry)) {
+				/* Gets a chm block info */
+				ChmBlockInfo bb = ChmBlockInfo.getChmBlockInfoInstance(
+						directoryListingEntry, (int) getChmLzxcResetTable()
+								.getBlockLen(), getChmLzxcControlData());
+				tmp = new byte[bb.getEndBlock() - bb.getStartBlock() + 1][];
+
+				int i = 0, start = 0, block = 0;
+
+				if ((getLzxBlockLength() < Integer.MAX_VALUE)
+						&& (getLzxBlockOffset() < Integer.MAX_VALUE)) {
+					// TODO: Improve the caching
+					// caching ... = O(n^2) - depends on startBlock and endBlock
+					if (getLzxBlocksCache().size() != 0) {
+						for (i = 0; i < getLzxBlocksCache().size(); i++) {
+							lzxBlock = getLzxBlocksCache().get(i);
+							for (int j = bb.getIniBlock(); j <= bb
+									.getStartBlock(); j++) {
+								if (lzxBlock.getBlockNumber() == j)
+									if (j > start) {
+										start = j;
+										block = i;
+									}
+								if (start == bb.getStartBlock())
+									break;
+							}
+						}
+					}
+
+					if (i == getLzxBlocksCache().size() && i == 0) {
+						start = bb.getIniBlock();
+
+						dataSegment = ChmCommons.getChmBlockSegment(getData(),
+								getChmLzxcResetTable(), start,
+								(int) getLzxBlockOffset(),
+								(int) getLzxBlockLength());
+
+						lzxBlock = new ChmLzxBlock(start, dataSegment,
+								getChmLzxcResetTable().getBlockLen(), null);
+
+						getLzxBlocksCache().add(lzxBlock);
+					} else {
+						lzxBlock = getLzxBlocksCache().get(block);
+					}
+
+					for (i = start; i <= bb.getEndBlock();) {
+						if (i == bb.getStartBlock() && i == bb.getEndBlock()) {
+							dataSegment = lzxBlock.getContent(
+									bb.getStartOffset(), bb.getEndOffset());
+							tmp[0] = dataSegment;
+							break;
+						}
+
+						if (i == bb.getStartBlock()) {
+							dataSegment = lzxBlock.getContent(bb
+									.getStartOffset());
+							tmp[0] = dataSegment;
+						}
+
+						if (i > bb.getStartBlock() && i < bb.getEndBlock()) {
+							dataSegment = lzxBlock.getContent();
+							tmp[i - bb.getStartBlock()] = dataSegment;
+						}
+
+						if (i == bb.getEndBlock()) {
+							dataSegment = lzxBlock.getContent(0,
+									bb.getEndOffset());
+							tmp[i - bb.getStartBlock()] = dataSegment;
+							break;
+						}
+
+						i++;
+
+						if (i % getChmLzxcControlData().getResetInterval() == 0) {
+							lzxBlock = new ChmLzxBlock(i,
+									ChmCommons.getChmBlockSegment(getData(),
+											getChmLzxcResetTable(), i,
+											(int) getLzxBlockOffset(),
+											(int) getLzxBlockLength()),
+									getChmLzxcResetTable().getBlockLen(), null);
+						} else {
+							lzxBlock = new ChmLzxBlock(i,
+									ChmCommons.getChmBlockSegment(getData(),
+											getChmLzxcResetTable(), i,
+											(int) getLzxBlockOffset(),
+											(int) getLzxBlockLength()),
+									getChmLzxcResetTable().getBlockLen(),
+									lzxBlock);
+						}
+
+						getLzxBlocksCache().add(lzxBlock);
+					}
+
+					if (getLzxBlocksCache().size() > getChmLzxcResetTable()
+							.getBlockCount()) {
+						getLzxBlocksCache().clear();
+					}
+				}
+			}
+		} catch (ChmParsingException e) {
+			// e.printStackTrace();
+			// System.err.println("Unknown exception");
+		}
+		return (tmp != null) ? tmp : (new byte[1][]);
+	}
+
+	private void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
+		this.lzxBlocksCache = lzxBlocksCache;
+	}
+
+	private List<ChmLzxBlock> getLzxBlocksCache() {
+		return lzxBlocksCache;
+	}
+
+	private void setChmDirList(ChmDirectoryListingSet chmDirList) {
+		this.chmDirList = chmDirList;
+	}
+
+	public ChmDirectoryListingSet getChmDirList() {
+		return chmDirList;
+	}
+
+	private void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
+		this.chmItsfHeader = chmItsfHeader;
+	}
+
+	private ChmItsfHeader getChmItsfHeader() {
+		return chmItsfHeader;
+	}
+}

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,130 @@
+package org.apache.tika.parser.chm.core;
+
+import java.util.List;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
+
+public class ChmWrapper {
+	private List<ChmLzxBlock> lzxBlocksCache = null;
+	private ChmDirectoryListingSet chmDirList = null;
+	private ChmItsfHeader chmItsfHeader = null;
+	private ChmItspHeader chmItspHeader = null;
+	private ChmLzxcResetTable chmLzxcResetTable = null;
+	private ChmLzxcControlData chmLzxcControlData = null;
+	private byte[] data = null;
+	private int indexOfContent;
+	private long lzxBlockOffset;
+	private long lzxBlockLength;
+	private int indexOfResetData;
+	private int indexOfResetTable;
+	private int startIndex;
+
+	protected int getStartIndex() {
+		return startIndex;
+	}
+
+	protected void setStartIndex(int startIndex) {
+		this.startIndex = startIndex;
+	}
+
+	protected int getIndexOfResetTable() {
+		return indexOfResetTable;
+	}
+
+	protected void setIndexOfResetTable(int indexOfResetTable) {
+		this.indexOfResetTable = indexOfResetTable;
+	}
+
+	protected List<ChmLzxBlock> getLzxBlocksCache() {
+		return lzxBlocksCache;
+	}
+
+	protected void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
+		this.lzxBlocksCache = lzxBlocksCache;
+	}
+
+	protected ChmDirectoryListingSet getChmDirList() {
+		return chmDirList;
+	}
+
+	protected void setChmDirList(ChmDirectoryListingSet chmDirList) {
+		this.chmDirList = chmDirList;
+	}
+
+	protected ChmItsfHeader getChmItsfHeader() {
+		return chmItsfHeader;
+	}
+
+	protected void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
+		this.chmItsfHeader = chmItsfHeader;
+	}
+
+	protected ChmLzxcResetTable getChmLzxcResetTable() {
+		return chmLzxcResetTable;
+	}
+
+	protected void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
+		this.chmLzxcResetTable = chmLzxcResetTable;
+	}
+
+	protected ChmLzxcControlData getChmLzxcControlData() {
+		return chmLzxcControlData;
+	}
+
+	protected void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
+		this.chmLzxcControlData = chmLzxcControlData;
+	}
+
+	protected byte[] getData() {
+		return data;
+	}
+
+	protected void setData(byte[] data) {
+		this.data = data;
+	}
+
+	protected int getIndexOfContent() {
+		return indexOfContent;
+	}
+
+	protected void setIndexOfContent(int indexOfContent) {
+		this.indexOfContent = indexOfContent;
+	}
+
+	protected long getLzxBlockOffset() {
+		return lzxBlockOffset;
+	}
+
+	protected void setLzxBlockOffset(long lzxBlockOffset) {
+		this.lzxBlockOffset = lzxBlockOffset;
+	}
+
+	protected long getLzxBlockLength() {
+		return lzxBlockLength;
+	}
+
+	protected void setLzxBlockLength(long lzxBlockLength) {
+		this.lzxBlockLength = lzxBlockLength;
+	}
+
+	protected void setChmItspHeader(ChmItspHeader chmItspHeader) {
+		this.chmItspHeader = chmItspHeader;
+	}
+
+	protected ChmItspHeader getChmItspHeader() {
+		return chmItspHeader;
+	}
+
+	protected void setIndexOfResetData(int indexOfResetData) {
+		this.indexOfResetData = indexOfResetData;
+	}
+
+	protected int getIndexOfResetData() {
+		return indexOfResetData;
+	}
+}

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.exception;
+
+public class ChmParsingException extends RuntimeException {
+	private static final long serialVersionUID = 6497936044733665210L;
+
+	public ChmParsingException() {
+		super();
+	}
+
+	public ChmParsingException(String description) {
+		super(description);
+	}
+}

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.lzx;
+
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+/**
+ * A container that contains chm block information such as: i. initial block is
+ * using to reset main tree ii. start block is using for knowing where to start
+ * iii. end block is using for knowing where to stop iv. start offset is using
+ * for knowing where to start reading v. end offset is using for knowing where
+ * to stop reading
+ * 
+ */
+public class ChmBlockInfo {
+	/* class members */
+	private int iniBlock;
+	private int startBlock;
+	private int endBlock;
+	private int startOffset;
+	private int endOffset;
+
+	private static ChmBlockInfo chmBlockInfo = null;
+
+	private ChmBlockInfo() {
+
+	}
+
+	/**
+	 * Returns an information related to the chmBlockInfo
+	 * 
+	 * @param dle
+	 *            - DirectoryListingEntry
+	 * @param bytesPerBlock
+	 *            - int, = chmLzxcResetTable.block_length
+	 * @param clcd
+	 *            - ChmLzxcControlData
+	 * @param chmBlockInfo
+	 *            - ChmBlockInfo
+	 * 
+	 * @return ChmBlockInfo
+	 */
+	protected ChmBlockInfo getChmBlockInfo(DirectoryListingEntry dle,
+			int bytesPerBlock, ChmLzxcControlData clcd,
+			ChmBlockInfo chmBlockInfo) {
+		if (!validateParameters(dle, bytesPerBlock, clcd, chmBlockInfo))
+			throw new ChmParsingException("Please check you parameters");
+
+		chmBlockInfo.setStartBlock(dle.getOffset() / bytesPerBlock);
+		chmBlockInfo.setEndBlock((dle.getOffset() + dle.getLength())
+				/ bytesPerBlock);
+		chmBlockInfo.setStartOffset(dle.getOffset() % bytesPerBlock);
+		chmBlockInfo.setEndOffset((dle.getOffset() + dle.getLength())
+				% bytesPerBlock);
+		// potential problem with casting long to int
+		chmBlockInfo
+				.setIniBlock((chmBlockInfo.startBlock - chmBlockInfo.startBlock)
+						% (int) clcd.getResetInterval());
+		return chmBlockInfo;
+	}
+
+	public static ChmBlockInfo getChmBlockInfoInstance(
+			DirectoryListingEntry dle, int bytesPerBlock,
+			ChmLzxcControlData clcd) {
+		setChmBlockInfo(new ChmBlockInfo());
+		getChmBlockInfo().setStartBlock(dle.getOffset() / bytesPerBlock);
+		getChmBlockInfo().setEndBlock(
+				(dle.getOffset() + dle.getLength()) / bytesPerBlock);
+		getChmBlockInfo().setStartOffset(dle.getOffset() % bytesPerBlock);
+		getChmBlockInfo().setEndOffset(
+				(dle.getOffset() + dle.getLength()) % bytesPerBlock);
+		// potential problem with casting long to int
+		getChmBlockInfo().setIniBlock(
+				(getChmBlockInfo().startBlock - getChmBlockInfo().startBlock)
+						% (int) clcd.getResetInterval());
+		return getChmBlockInfo();
+	}
+
+	/**
+	 * Returns textual representation of ChmBlockInfo
+	 */
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append("iniBlock:=" + getIniBlock() + ", ");
+		sb.append("startBlock:=" + getStartBlock() + ", ");
+		sb.append("endBlock:=" + getEndBlock() + ", ");
+		sb.append("startOffset:=" + getStartOffset() + ", ");
+		sb.append("endOffset:=" + getEndOffset()
+				+ System.getProperty("line.separator"));
+		return sb.toString();
+	}
+
+	private boolean validateParameters(DirectoryListingEntry dle,
+			int bytesPerBlock, ChmLzxcControlData clcd,
+			ChmBlockInfo chmBlockInfo) {
+		int goodParameter = 0;
+		if (dle != null)
+			++goodParameter;
+		if (bytesPerBlock > 0)
+			++goodParameter;
+		if (clcd != null)
+			++goodParameter;
+		if (chmBlockInfo != null)
+			++goodParameter;
+		return (goodParameter == 4);
+	}
+
+	public static void main(String[] args) {
+	}
+
+	/**
+	 * Returns an initial block index
+	 * 
+	 * @return int
+	 */
+	public int getIniBlock() {
+		return iniBlock;
+	}
+
+	/**
+	 * Sets the initial block index
+	 * 
+	 * @param iniBlock
+	 *            - int
+	 */
+	private void setIniBlock(int iniBlock) {
+		this.iniBlock = iniBlock;
+	}
+
+	/**
+	 * Returns the start block index
+	 * 
+	 * @return int
+	 */
+	public int getStartBlock() {
+		return startBlock;
+	}
+
+	/**
+	 * Sets the start block index
+	 * 
+	 * @param startBlock
+	 *            - int
+	 */
+	private void setStartBlock(int startBlock) {
+		this.startBlock = startBlock;
+	}
+
+	/**
+	 * Returns the end block index
+	 * 
+	 * @return - int
+	 */
+	public int getEndBlock() {
+		return endBlock;
+	}
+
+	/**
+	 * Sets the end block index
+	 * 
+	 * @param endBlock
+	 *            - int
+	 */
+	private void setEndBlock(int endBlock) {
+		this.endBlock = endBlock;
+	}
+
+	/**
+	 * Returns the start offset index
+	 * 
+	 * @return - int
+	 */
+	public int getStartOffset() {
+		return startOffset;
+	}
+
+	/**
+	 * Sets the start offset index
+	 * 
+	 * @param startOffset
+	 *            - int
+	 */
+	private void setStartOffset(int startOffset) {
+		this.startOffset = startOffset;
+	}
+
+	/**
+	 * Returns the end offset index
+	 * 
+	 * @return - int
+	 */
+	public int getEndOffset() {
+		return endOffset;
+	}
+
+	/**
+	 * Sets the end offset index
+	 * 
+	 * @param endOffset
+	 *            - int
+	 */
+	private void setEndOffset(int endOffset) {
+		this.endOffset = endOffset;
+	}
+
+	public static void setChmBlockInfo(ChmBlockInfo chmBlockInfo) {
+		ChmBlockInfo.chmBlockInfo = chmBlockInfo;
+	}
+
+	public static ChmBlockInfo getChmBlockInfo() {
+		return chmBlockInfo;
+	}
+}

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxBlock.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxBlock.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxBlock.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxBlock.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,906 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.lzx;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmCommons.IntelState;
+import org.apache.tika.parser.chm.core.ChmCommons.LzxState;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+/**
+ * Decompresses a chm block. Depending on chm block type chooses most relevant
+ * decompressing method. A chm block type can be as follows:</br> <li>UNDEFINED
+ * - no action taken, i.e. skipping the block <li>VERBATIM <li>ALIGNED_OFFSET
+ * <li>UNCOMPRESSED the most simplest In addition there are unknown types (4-7).
+ * Currently relying on previous chm block these types changing according to the
+ * previous chm block type. We need to invent more appropriate way to handle
+ * such types.
+ * 
+ */
+public class ChmLzxBlock {
+	private int block_number;
+	private long block_length;
+	private ChmLzxState state;
+	private byte[] content = null;
+	private ChmSection chmSection = null;
+	private int contentLength = 0;
+
+	// trying to find solution for bad blocks ...
+	private int previousBlockType = -1;
+
+	public ChmLzxBlock(int blockNumber, byte[] dataSegment, long blockLength,
+			ChmLzxBlock prevBlock) {
+		try {
+			if (validateConstructorParams(blockNumber, dataSegment, blockLength)) {
+				setBlockNumber(blockNumber);
+
+				if (prevBlock != null
+						&& prevBlock.getState().getBlockLength() > prevBlock
+								.getState().getBlockRemaining())
+					setChmSection(new ChmSection(prevBlock.getContent()));
+				else
+					setChmSection(new ChmSection(dataSegment));
+
+				setBlockLength(blockLength);
+
+				// ============================================
+				// we need to take care of previous context
+				// ============================================
+				checkLzxBlock(prevBlock);
+				setContent((int) blockLength);
+				if (prevBlock == null
+						|| getContent().length < (int) getBlockLength()) {
+					setContent((int) getBlockLength());
+				}
+
+				if (prevBlock != null && prevBlock.getState() != null)
+					previousBlockType = prevBlock.getState().getBlockType();
+
+				try {
+					extractContent();
+				} catch (ChmParsingException e) {
+					// System.err.println(e.getMessage());
+				}
+			} else
+				System.err.println("Check your chm lzx block parameters");
+		} catch (ChmParsingException e) {
+			// TODO: handle exception
+		}
+	}
+
+	protected int getContentLength() {
+		return contentLength;
+	}
+
+	protected void setContentLength(int contentLength) {
+		this.contentLength = contentLength;
+	}
+
+	private ChmSection getChmSection() {
+		return chmSection;
+	}
+
+	private void setChmSection(ChmSection chmSection) {
+		this.chmSection = chmSection;
+	}
+
+	private void assertStateNotNull() {
+		if (getState() == null)
+			throw new ChmParsingException("state is null");
+	}
+
+	private void extractContent() {
+		assertStateNotNull();
+		if (getChmSection().getData() != null) {
+			while (getContentLength() < getBlockLength()) {// && tempStopLoop
+				if (getState() != null && getState().getBlockRemaining() == 0) {
+					if (getState().getHadStarted() == LzxState.NOT_STARTED_DECODING) {
+						getState().setHadStarted(LzxState.STARTED_DECODING);
+						if (getChmSection().getSyncBits(1) == 1) {
+							int intelSizeTemp = (getChmSection()
+									.getSyncBits(16) << 16)
+									+ getChmSection().getSyncBits(16);
+							if (intelSizeTemp >= 0)
+								getState().setIntelFileSize(intelSizeTemp);
+							else
+								getState().setIntelFileSize(0);
+						}
+					}
+					getState().setBlockType(getChmSection().getSyncBits(3));
+					getState().setBlockLength(
+							(getChmSection().getSyncBits(16) << 8)
+									+ getChmSection().getSyncBits(8));
+					getState().setBlockRemaining(getState().getBlockLength());
+
+					// ----------------------------------------
+					// Trying to handle 3 - 7 block types
+					// ----------------------------------------
+					if (getState().getBlockType() > 3) {
+						if (previousBlockType >= 0 && previousBlockType < 3)
+							getState().setBlockType(previousBlockType);
+					}
+
+					switch (getState().getBlockType()) {
+					case ChmCommons.ALIGNED_OFFSET:
+						createAlignedTreeTable();
+					case ChmCommons.VERBATIM:
+						/* Creates mainTreeTable */
+						createMainTreeTable();
+						createLengthTreeTable();
+						if (getState().getMainTreeLengtsTable()[0xe8] != 0)
+							getState().setIntelState(IntelState.STARTED);
+						break;
+					case ChmCommons.UNCOMPRESSED:
+						getState().setIntelState(IntelState.STARTED);
+						if (getChmSection().getTotal() > 16)
+							getChmSection().setSwath(
+									getChmSection().getSwath() - 1);
+						getState().setR0(
+								(new BigInteger(getChmSection()
+										.reverseByteOrder(
+												getChmSection().unmarshalBytes(
+														4))).longValue()));
+						getState().setR1(
+								(new BigInteger(getChmSection()
+										.reverseByteOrder(
+												getChmSection().unmarshalBytes(
+														4))).longValue()));
+						getState().setR2(
+								(new BigInteger(getChmSection()
+										.reverseByteOrder(
+												getChmSection().unmarshalBytes(
+														4))).longValue()));
+						break;
+					default:
+						break;
+					}
+				}
+
+				int tempLen;
+
+				if (getContentLength() + getState().getBlockRemaining() > getBlockLength()) {
+					getState().setBlockRemaining(
+							getContentLength() + getState().getBlockRemaining()
+									- (int) getBlockLength());
+					tempLen = (int) getBlockLength();
+				} else {
+					tempLen = getContentLength()
+							+ getState().getBlockRemaining();
+					getState().setBlockRemaining(0);
+				}
+
+				switch (getState().getBlockType()) {
+				case ChmCommons.ALIGNED_OFFSET:
+					// if(prevblock.lzxState.length>prevblock.lzxState.remaining)
+					decompressAlignedBlock(tempLen, getChmSection().getData());// prevcontext
+					break;
+				case ChmCommons.VERBATIM:
+					decompressVerbatimBlock(tempLen, getChmSection().getData());
+					break;
+				case ChmCommons.UNCOMPRESSED:
+					decompressUncompressedBlock(tempLen, getChmSection()
+							.getData());
+					break;
+				}
+				getState().increaseFramesRead();
+				if ((getState().getFramesRead() < 32768)
+						&& getState().getIntelFileSize() != 0)
+					intelE8Decoding();
+			}
+		}
+	}
+
+	protected void intelE8Decoding() {
+		if (getBlockLength() <= ChmConstants.LZX_PRETREE_TABLEBITS
+				|| (getState().getIntelState() == IntelState.NOT_STARTED)) {
+			getState().setBlockRemaining(
+					getState().getBlockRemaining() - (int) getBlockLength());
+		} else {
+			long curpos = getState().getBlockRemaining();
+			getState().setBlockRemaining(
+					getState().getBlockRemaining() - (int) getBlockLength());
+			int i = 0;
+			while (i < getBlockLength() - 10) {
+				if (content[i] != 0xe8) {
+					i++;
+					continue;
+				}
+				byte[] b = new byte[4];
+				b[0] = getContent()[i + 3];
+				b[1] = getContent()[i + 2];
+				b[2] = getContent()[i + 1];
+				b[3] = getContent()[i + 0];
+				long absoff = (new BigInteger(b)).longValue();
+				if ((absoff >= -curpos)
+						&& (absoff < getState().getIntelFileSize())) {
+					long reloff = (absoff >= 0) ? absoff - curpos : absoff
+							+ getState().getIntelFileSize();
+					getContent()[i + 0] = (byte) reloff;
+					getContent()[i + 1] = (byte) (reloff >>> 8);
+					getContent()[i + 2] = (byte) (reloff >>> 16);
+					getContent()[i + 3] = (byte) (reloff >>> 24);
+				}
+				i += 4;
+				curpos += 5;
+			}
+		}
+	}
+
+	private short[] createPreLenTable() {
+		short[] tmp = new short[ChmConstants.LZX_PRETREE_MAXSYMBOLS];
+		for (int i = 0; i < ChmConstants.LZX_PRETREE_MAXSYMBOLS; i++) {
+			tmp[i] = (short) getChmSection().getSyncBits(
+					ChmConstants.LZX_PRETREE_NUM_ELEMENTS_BITS);
+		}
+		return tmp;
+	}
+
+	private void createLengthTreeTable() {
+		short[] prelentable = createPreLenTable();
+
+		if (prelentable == null) {
+			throw new ChmParsingException("pretreetable is null");
+		}
+
+		short[] pretreetable = createTreeTable2(prelentable,
+				(1 << ChmConstants.LZX_PRETREE_TABLEBITS)
+						+ (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
+				ChmConstants.LZX_PRETREE_TABLEBITS,
+				ChmConstants.LZX_PRETREE_MAXSYMBOLS);
+
+		if (pretreetable == null) {
+			throw new ChmParsingException("pretreetable is null");
+		}
+
+		createLengthTreeLenTable(0, ChmConstants.LZX_NUM_SECONDARY_LENGTHS,
+				pretreetable, prelentable);
+
+		getState().setLengthTreeTable(
+				createTreeTable2(getState().getLengthTreeLengtsTable(),
+						(1 << ChmConstants.LZX_MAINTREE_TABLEBITS)
+								+ (ChmConstants.LZX_LENGTH_MAXSYMBOLS << 1),
+						ChmConstants.LZX_MAINTREE_TABLEBITS,
+						ChmConstants.LZX_NUM_SECONDARY_LENGTHS));
+	}
+
+	public void decompressUncompressedBlock(int len, byte[] prevcontent) {
+		if (getContentLength() + getState().getBlockRemaining() <= getBlockLength()) {
+			for (int i = getContentLength(); i < (getContentLength() + getState()
+					.getBlockRemaining()); i++)
+				content[i] = getChmSection().getByte();
+
+			setContentLength(getContentLength()
+					+ getState().getBlockRemaining());
+			getState().setBlockRemaining(0);
+		} else {
+			for (int i = getContentLength(); i < getBlockLength(); i++)
+				content[i] = getChmSection().getByte();
+			getState().setBlockRemaining(
+					(int) getBlockLength() - getContentLength());// = blockLen -
+																	// contentlen;
+			setContentLength((int) getBlockLength());
+		}
+	}
+
+	public void decompressAlignedBlock(int len, byte[] prevcontent) {
+
+		if ((getChmSection() == null) || (getState() == null)
+				|| (getState().getMainTreeTable() == null))
+			throw new ChmParsingException("chm section is null");
+
+		short s;
+		int x, i, border;
+		int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
+		int matchoffset = 0;
+		for (i = getContentLength(); i < len; i++) {
+			/* new code */
+			border = getChmSection().getDesyncBits(
+					ChmConstants.LZX_MAINTREE_TABLEBITS, 0);
+			if (border >= getState().mainTreeTable.length)
+				break;
+			/* end new code */
+			s = getState().mainTreeTable[getChmSection().getDesyncBits(
+					ChmConstants.LZX_MAINTREE_TABLEBITS, 0)];
+			if (s >= getState().getMainTreeElements()) {
+				x = ChmConstants.LZX_MAINTREE_TABLEBITS;
+				do {
+					x++;
+					s <<= 1;
+					s += getChmSection().checkBit(x);
+				} while ((s = getState().mainTreeTable[s]) >= getState()
+						.getMainTreeElements());
+			}
+			getChmSection().getSyncBits(getState().mainTreeTable[s]);
+			if (s < ChmConstants.LZX_NUM_CHARS) {
+				content[i] = (byte) s;
+			} else {
+				s -= ChmConstants.LZX_NUM_CHARS;
+				matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
+				if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
+					matchfooter = getState().lengthTreeTable[getChmSection()
+							.getDesyncBits(ChmConstants.LZX_MAINTREE_TABLEBITS,
+									0)];
+					if (matchfooter >= ChmConstants.LZX_MAINTREE_TABLEBITS) {
+						x = ChmConstants.LZX_MAINTREE_TABLEBITS;
+						do {
+							x++;
+							matchfooter <<= 1;
+							matchfooter += getChmSection().checkBit(x);
+						} while ((matchfooter = getState().lengthTreeTable[matchfooter]) >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
+					}
+					getChmSection().getSyncBits(
+							getState().lengthTreeLengtsTable[matchfooter]);
+					matchlen += matchfooter;
+				}
+				matchlen += ChmConstants.LZX_MIN_MATCH;
+				matchoffset = s >>> 3;
+				if (matchoffset > 2) {
+					extra = ChmConstants.EXTRA_BITS[matchoffset];
+					matchoffset = (ChmConstants.POSITION_BASE[matchoffset] - 2);
+					if (extra > 3) {
+						extra -= 3;
+						long l = getChmSection().getSyncBits(extra);
+						matchoffset += (l << 3);
+						int g = getChmSection().getDesyncBits(
+								ChmConstants.LZX_NUM_PRIMARY_LENGTHS, 0);
+						int t = getState().getAlignedTreeTable()[g];
+						if (t >= getState().getMainTreeElements()) {
+							x = ChmConstants.LZX_MAINTREE_TABLEBITS;
+							do {
+								x++;
+								t <<= 1;
+								t += getChmSection().checkBit(x);
+							} while ((t = getState().getAlignedTreeTable()[t]) >= getState()
+									.getMainTreeElements());
+						}
+						getChmSection().getSyncBits(
+								getState().getAlignedTreeTable()[t]);
+						matchoffset += t;
+					} else if (extra == 3) {
+						int g = (int) getChmSection().getDesyncBits(
+								ChmConstants.LZX_NUM_PRIMARY_LENGTHS, 0);
+						int t = getState().getAlignedTreeTable()[g];
+						if (t >= getState().getMainTreeElements()) {
+							x = ChmConstants.LZX_MAINTREE_TABLEBITS;
+							do {
+								x++;
+								t <<= 1;
+								t += getChmSection().checkBit(x);
+							} while ((t = getState().getAlignedTreeTable()[t]) >= getState()
+									.getMainTreeElements());
+						}
+						getChmSection().getSyncBits(
+								getState().getAlignedTreeTable()[t]);
+						matchoffset += t;
+					} else if (extra > 0) {
+						long l = getChmSection().getSyncBits(extra);
+						matchoffset += l;
+					} else
+						matchoffset = 1;
+					getState().setR2(getState().getR1());
+					getState().setR1(getState().getR0());
+					getState().setR0(matchoffset);
+				} else if (matchoffset == 0) {
+					matchoffset = (int) getState().getR0();
+				} else if (matchoffset == 1) {
+					matchoffset = (int) getState().getR1();
+					getState().setR1(getState().getR0());
+					getState().setR0(matchoffset);
+				} else /** match_offset == 2 */
+				{
+					matchoffset = (int) getState().getR2();
+					getState().setR2(getState().getR0());
+					getState().setR0(matchoffset);
+				}
+				rundest = i;
+				runsrc = rundest - matchoffset;
+				i += (matchlen - 1);
+				if (i > len)
+					break;
+
+				if (runsrc < 0) {
+					if (matchlen + runsrc <= 0) {
+						runsrc = prevcontent.length + runsrc;
+						while (matchlen-- > 0)
+							content[rundest++] = prevcontent[runsrc++];
+					} else {
+						runsrc = prevcontent.length + runsrc;
+						while (runsrc < prevcontent.length)
+							content[rundest++] = prevcontent[runsrc++];
+						matchlen = matchlen + runsrc - prevcontent.length;
+						runsrc = 0;
+						while (matchlen-- > 0)
+							content[rundest++] = content[runsrc++];
+					}
+
+				} else {
+					/* copies any wrappes around source data */
+					while ((runsrc < 0) && (matchlen-- > 0)) {
+						content[rundest++] = content[(int) (runsrc + getBlockLength())];
+						runsrc++;
+					}
+					/* copies match data - no worries about destination wraps */
+					while (matchlen-- > 0)
+						content[rundest++] = content[runsrc++];
+				}
+			}
+		}
+		setContentLength(len);
+	}
+
+	private void assertShortArrayNotNull(short[] array) {
+		if (array == null)
+			throw new ChmParsingException("short[] is null");
+	}
+
+	private void decompressVerbatimBlock(int len, byte[] prevcontent) {
+		short s;
+		int x, i;
+		int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
+		int matchoffset = 0;
+		for (i = getContentLength(); i < len; i++) {
+			int f = (int) getChmSection().getDesyncBits(
+					ChmConstants.LZX_MAINTREE_TABLEBITS, 0);
+			assertShortArrayNotNull(getState().getMainTreeTable());
+			s = getState().getMainTreeTable()[f];
+			if (s >= ChmConstants.LZX_MAIN_MAXSYMBOLS) {
+				x = ChmConstants.LZX_MAINTREE_TABLEBITS;
+				do {
+					x++;
+					s <<= 1;
+					s += getChmSection().checkBit(x);
+				} while ((s = getState().getMainTreeTable()[s]) >= ChmConstants.LZX_MAIN_MAXSYMBOLS);
+			}
+			getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]);
+			if (s < ChmConstants.LZX_NUM_CHARS) {
+				content[i] = (byte) s;
+			} else {
+				s -= ChmConstants.LZX_NUM_CHARS;
+				matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
+				if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
+					matchfooter = getState().getLengthTreeTable()[(int) getChmSection()
+							.getDesyncBits(ChmConstants.LZX_LENGTH_TABLEBITS, 0)];
+					if (matchfooter >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS) {
+						x = ChmConstants.LZX_LENGTH_TABLEBITS;
+						do {
+							x++;
+							matchfooter <<= 1;
+							matchfooter += getChmSection().checkBit(x);
+						} while ((matchfooter = getState().getLengthTreeTable()[matchfooter]) >= ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
+					}
+					getChmSection().getSyncBits(
+							getState().getLengthTreeLengtsTable()[matchfooter]);
+					matchlen += matchfooter;
+				}
+				matchlen += ChmConstants.LZX_MIN_MATCH;
+				// shorter than 2
+				matchoffset = s >>> 3;
+				if (matchoffset > 2) {
+					if (matchoffset != 3) { // should get other bits to retrieve
+											// offset
+						extra = ChmConstants.EXTRA_BITS[matchoffset];
+						long l = getChmSection().getSyncBits(extra);
+						matchoffset = (int) (ChmConstants.POSITION_BASE[matchoffset] - 2 + l);
+					} else {
+						matchoffset = 1;
+					}
+					getState().setR2(getState().getR1());
+					getState().setR1(getState().getR0());
+					getState().setR0(matchoffset);
+				} else if (matchoffset == 0) {
+					matchoffset = (int) getState().getR0();
+				} else if (matchoffset == 1) {
+					matchoffset = (int) getState().getR1();
+					getState().setR1(getState().getR0());
+					getState().setR0(matchoffset);
+				} else /* match_offset == 2 */
+				{
+					matchoffset = (int) getState().getR2();
+					getState().setR2(getState().getR0());
+					getState().setR0(matchoffset);
+				}
+				rundest = i;
+				runsrc = rundest - matchoffset;
+				i += (matchlen - 1);
+				if (i > len)
+					break;
+				if (runsrc < 0) {
+					if (matchlen + runsrc <= 0) {
+						runsrc = prevcontent.length + runsrc;
+						while ((matchlen-- > 0) && (prevcontent != null)
+								&& ((runsrc + 1) > 0))
+							if ((rundest < content.length)
+									&& (runsrc < content.length))
+								content[rundest++] = prevcontent[runsrc++];
+					} else {
+						runsrc = prevcontent.length + runsrc;
+						while (runsrc < prevcontent.length)
+							if ((rundest < content.length)
+									&& (runsrc < content.length))
+								content[rundest++] = prevcontent[runsrc++];
+						matchlen = matchlen + runsrc - prevcontent.length;
+						runsrc = 0;
+						while (matchlen-- > 0)
+							content[rundest++] = content[runsrc++];
+					}
+
+				} else {
+					/* copies any wrapped source data */
+					while ((runsrc < 0) && (matchlen-- > 0)) {
+						content[rundest++] = content[(int) (runsrc + getBlockLength())];
+						runsrc++;
+					}
+					/* copies match data - no worries about destination wraps */
+					while (matchlen-- > 0) {
+						if ((rundest < content.length)
+								&& (runsrc < content.length))
+							content[rundest++] = content[runsrc++];
+					}
+				}
+			}
+		}
+		setContentLength(len);
+	}
+
+	private void createLengthTreeLenTable(int offset, int tablelen,
+			short[] pretreetable, short[] prelentable) {
+		if (prelentable == null || getChmSection() == null
+				|| pretreetable == null || prelentable == null)
+			throw new ChmParsingException("is null");
+
+		int i = offset; // represents offset
+		int z, y, x;// local counters
+		while (i < tablelen) {
+			z = pretreetable[(int) getChmSection().getDesyncBits(
+					ChmConstants.LZX_PRETREE_TABLEBITS, 0)];
+			if (z >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS) {// 1 bug, should be
+																// 20
+				x = ChmConstants.LZX_PRETREE_TABLEBITS;
+				do {
+					x++;
+					z <<= 1;
+					z += getChmSection().checkBit(x);
+				} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS);
+			}
+			getChmSection().getSyncBits(prelentable[z]);
+			if (z < 17) {
+				z = getState().getLengthTreeLengtsTable()[i] - z;
+				if (z < 0)
+					z = z + 17;
+				getState().getLengthTreeLengtsTable()[i] = (short) z;
+				i++;
+			} else if (z == 17) {
+				y = (int) getChmSection().getSyncBits(4);
+				y += 4;
+				for (int j = 0; j < y; j++)
+					if (i < getState().getLengthTreeLengtsTable().length)
+						getState().getLengthTreeLengtsTable()[i++] = 0;
+			} else if (z == 18) {
+				y = (int) getChmSection().getSyncBits(5);
+				y += 20;
+				for (int j = 0; j < y; j++)
+					if (i < getState().getLengthTreeLengtsTable().length)
+						getState().getLengthTreeLengtsTable()[i++] = 0;
+			} else if (z == 19) {
+				y = getChmSection().getSyncBits(1);
+				y += 4;
+				z = pretreetable[(int) getChmSection().getDesyncBits(
+						ChmConstants.LZX_PRETREE_TABLEBITS, 0)];
+				if (z >= ChmConstants.LZX_PRETREE_NUM_ELEMENTS) {// 20
+					x = ChmConstants.LZX_PRETREE_TABLEBITS;// 6
+					do {
+						x++;
+						z <<= 1;
+						z += getChmSection().checkBit(x);
+					} while ((z = pretreetable[z]) >= ChmConstants.LZX_MAINTREE_TABLEBITS);
+				}
+				getChmSection().getSyncBits(prelentable[z]);
+				z = getState().getLengthTreeLengtsTable()[i] - z;
+				if (z < 0)
+					z = z + 17;
+				for (int j = 0; j < y; j++)
+					getState().getLengthTreeLengtsTable()[i++] = (short) z;
+			}
+		}
+	}
+
+	private void createMainTreeTable() {
+		short[] prelentable = createPreLenTable();
+		short[] pretreetable = createTreeTable2(prelentable,
+				(1 << ChmConstants.LZX_PRETREE_TABLEBITS)
+						+ (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
+				ChmConstants.LZX_PRETREE_TABLEBITS,
+				ChmConstants.LZX_PRETREE_MAXSYMBOLS);
+		createMainTreeLenTable(0, ChmConstants.LZX_NUM_CHARS, pretreetable,
+				prelentable);
+		prelentable = createPreLenTable();
+		pretreetable = createTreeTable2(prelentable,
+				(1 << ChmConstants.LZX_PRETREE_TABLEBITS)
+						+ (ChmConstants.LZX_PRETREE_MAXSYMBOLS << 1),
+				ChmConstants.LZX_PRETREE_TABLEBITS,
+				ChmConstants.LZX_PRETREE_MAXSYMBOLS);
+		createMainTreeLenTable(ChmConstants.LZX_NUM_CHARS,
+				getState().mainTreeLengtsTable.length, pretreetable,
+				prelentable);
+
+		getState().setMainTreeTable(
+				createTreeTable2(getState().mainTreeLengtsTable,
+						(1 << ChmConstants.LZX_MAINTREE_TABLEBITS)
+								+ (ChmConstants.LZX_MAINTREE_MAXSYMBOLS << 1),
+						ChmConstants.LZX_MAINTREE_TABLEBITS, getState()
+								.getMainTreeElements()));
+
+	}
+
+	private void createMainTreeLenTable(int offset, int tablelen,
+			short[] pretreetable, short[] prelentable) {
+		if (pretreetable == null)
+			throw new ChmParsingException("pretreetable is null");
+		int i = offset;
+		int z, y, x;
+		while (i < tablelen) {
+			int f = getChmSection().getDesyncBits(
+					ChmConstants.LZX_PRETREE_TABLEBITS, 0);
+			z = pretreetable[f];
+			if (z >= ChmConstants.LZX_PRETREE_MAXSYMBOLS) {
+				x = ChmConstants.LZX_PRETREE_TABLEBITS;
+				do {
+					x++;
+					z <<= 1;
+					z += getChmSection().checkBit(x);
+				} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_MAXSYMBOLS);
+			}
+			getChmSection().getSyncBits(prelentable[z]);
+			if (z < 17) {
+				z = getState().getMainTreeLengtsTable()[i] - z;
+				if (z < 0)
+					z = z + 17;
+				getState().mainTreeLengtsTable[i] = (short) z;
+				i++;
+			} else if (z == 17) {
+				y = getChmSection().getSyncBits(4);
+				y += 4;
+				for (int j = 0; j < y; j++) {
+					assertInRange(getState().getMainTreeLengtsTable(), i);
+					getState().mainTreeLengtsTable[i++] = 0;
+				}
+			} else if (z == 18) {
+				y = getChmSection().getSyncBits(5);
+				y += 20;
+				for (int j = 0; j < y; j++) {
+					assertInRange(getState().getMainTreeLengtsTable(), i);
+					getState().mainTreeLengtsTable[i++] = 0;
+				}
+			} else if (z == 19) {
+				y = getChmSection().getSyncBits(1);
+				y += 4;
+				z = pretreetable[getChmSection().getDesyncBits(
+						ChmConstants.LZX_PRETREE_TABLEBITS, 0)];
+				if (z >= ChmConstants.LZX_PRETREE_MAXSYMBOLS) {
+					x = ChmConstants.LZX_PRETREE_TABLEBITS;
+					do {
+						x++;
+						z <<= 1;
+						z += getChmSection().checkBit(x);
+					} while ((z = pretreetable[z]) >= ChmConstants.LZX_PRETREE_MAXSYMBOLS);
+				}
+				getChmSection().getSyncBits(prelentable[z]);
+				z = getState().mainTreeLengtsTable[i] - z;
+				if (z < 0)
+					z = z + 17;
+				for (int j = 0; j < y; j++)
+					if (i < getState().getMainTreeLengtsTable().length)
+						getState().mainTreeLengtsTable[i++] = (short) z;
+			}
+		}
+	}
+
+	private void assertInRange(short[] array, int index) {
+		if (index >= array.length)
+			throw new ChmParsingException(index + " is bigger than "
+					+ array.length);
+	}
+
+	private short[] createAlignedLenTable() {
+		int tablelen = ChmConstants.LZX_BLOCKTYPE_UNCOMPRESSED;
+		int bits = ChmConstants.LZX_BLOCKTYPE_UNCOMPRESSED;
+		short[] tmp = new short[tablelen];
+		for (int i = 0; i < tablelen; i++) {
+			tmp[i] = (short) getChmSection().getSyncBits(bits);
+		}
+		return tmp;
+	}
+
+	private void createAlignedTreeTable() {
+		getState().setAlignedLenTable(createAlignedLenTable());
+		getState().setAlignedLenTable(
+				createTreeTable2(getState().getAlignedLenTable(),
+						(1 << ChmConstants.LZX_NUM_PRIMARY_LENGTHS)
+								+ (ChmConstants.LZX_ALIGNED_MAXSYMBOLS << 1),
+						ChmConstants.LZX_NUM_PRIMARY_LENGTHS,
+						ChmConstants.LZX_ALIGNED_MAXSYMBOLS));
+	}
+
+	private short[] createTreeTable2(short[] lentable, int tablelen, int bits,
+			int maxsymbol) {
+		short[] tmp = new short[tablelen];
+		short sym;
+		int leaf;
+		int bit_num = 1;
+		long fill;
+		int pos = 0;
+		/* the current position in the decode table */
+		long table_mask = (1 << bits);
+		long bit_mask = (table_mask >> 1);
+		long next_symbol = bit_mask;
+
+		/* fills entries for short codes for a direct mapping */
+		while (bit_num <= bits) {
+			for (sym = 0; sym < maxsymbol; sym++) {
+				if (lentable.length > sym && lentable[sym] == bit_num) {
+					leaf = pos;// pos=0
+
+					if ((pos += bit_mask) > table_mask)
+						return null;
+
+					fill = bit_mask;
+					while (fill-- > 0)
+						tmp[leaf++] = sym;
+				}
+			}
+			bit_mask >>= 1;
+			bit_num++;
+		}
+
+		/* if there are any codes longer than nbits */
+		if (pos != table_mask) {
+			/* clears the remainder of the table */
+			for (leaf = pos; leaf < table_mask; leaf++)
+				tmp[leaf] = 0;
+
+			/* gives ourselves room for codes to grow by up to 16 more bits */
+			pos <<= 16;
+			table_mask <<= 16;
+			bit_mask = 1 << 15;
+
+			while (bit_num <= 16) {
+				for (sym = 0; sym < maxsymbol; sym++) {
+					if ((lentable.length > sym) && (lentable[sym] == bit_num)) {
+						leaf = pos >> 16;
+						for (fill = 0; fill < bit_num - bits; fill++) {
+							/*
+							 * if this path hasn't been taken yet, 'allocate'
+							 * two entries
+							 */
+							if (tmp[leaf] == 0) {
+								if (((next_symbol << 1) + 1) < tmp.length) {
+									tmp[(int) (next_symbol << 1)] = 0;
+									tmp[(int) (next_symbol << 1) + 1] = 0;
+									tmp[leaf] = (short) next_symbol++;
+								}
+
+							}
+							/*
+							 * follows the path and select either left or right
+							 * for next bit
+							 */
+							leaf = tmp[leaf] << 1;
+							if (((pos >> (15 - fill)) & 1) != 0)
+								leaf++;
+						}
+						tmp[leaf] = sym;
+
+						if ((pos += bit_mask) > table_mask)
+							return null;
+						/* table overflow */
+					} else {
+						// return null;
+					}
+				}
+				bit_mask >>= 1;
+				bit_num++;
+			}
+		}
+
+		/* is it full table? */
+		if (pos == table_mask)
+			return tmp;
+
+		return tmp;
+	}
+
+	public byte[] getContent() {
+		return content;
+	}
+
+	public byte[] getContent(int startOffset, int endOffset) {
+		int length = endOffset - startOffset;
+		return (getContent() != null) ? Arrays.copyOfRange(getContent(),
+				startOffset, (startOffset + length)) : new byte[1];
+	}
+
+	public byte[] getContent(int start) {
+		return (getContent() != null) ? Arrays.copyOfRange(getContent(), start,
+				(getContent().length + start)) : new byte[1];
+	}
+
+	private void setContent(int contentLength) {
+		this.content = new byte[contentLength];
+	}
+
+	private void checkLzxBlock(ChmLzxBlock chmPrevLzxBlock) {
+		if (chmPrevLzxBlock == null && getBlockLength() < Integer.MAX_VALUE)
+			setState(new ChmLzxState((int) getBlockLength()));
+		else
+			setState(chmPrevLzxBlock.getState());
+	}
+
+	private boolean validateConstructorParams(int blockNumber,
+			byte[] dataSegment, long blockLength) {
+		int goodParameter = 0;
+		if (blockNumber >= 0)
+			++goodParameter;
+		else
+			throw new ChmParsingException("block number should be possitive");
+		if (dataSegment != null && dataSegment.length > 0)
+			++goodParameter;
+		else
+			throw new ChmParsingException("data segment should not be null");
+		if (blockLength > 0)
+			++goodParameter;
+		else
+			throw new ChmParsingException(
+					"block length should be more than zero");
+		return (goodParameter == 3);
+	}
+
+	public int getBlockNumber() {
+		return block_number;
+	}
+
+	private void setBlockNumber(int block_number) {
+		this.block_number = block_number;
+	}
+
+	private long getBlockLength() {
+		return block_length;
+	}
+
+	private void setBlockLength(long block_length) {
+		this.block_length = block_length;
+	}
+
+	public ChmLzxState getState() {
+		return state;
+	}
+
+	private void setState(ChmLzxState state) {
+		this.state = state;
+	}
+
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) {
+		// TODO Auto-generated method stub
+
+	}
+}

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java?rev=1133047&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java Tue Jun  7 15:44:41 2011
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.lzx;
+
+import java.util.concurrent.CancellationException;
+
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.core.ChmCommons.IntelState;
+import org.apache.tika.parser.chm.core.ChmCommons.LzxState;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+public class ChmLzxState {
+	/* Class' members */
+	private int window; /* the actual decoding window */
+	private long window_size; /* window size (32Kb through 2Mb) */
+	private int window_position; /* current offset within the window */
+	private int main_tree_elements; /* number of main tree elements */
+	private LzxState hadStarted; /* have we started decoding at all yet? */
+	private int block_type; /* type of this block */
+	private int block_length; /* uncompressed length of this block */
+	private int block_remaining; /* uncompressed bytes still left to decode */
+	private int frames_read; /* the number of CFDATA blocks processed */
+	private int intel_file_size; /* magic header value used for transform */
+	private long intel_current_possition; /* current offset in transform space */
+	private IntelState intel_state; /* have we seen any translatable data yet? */
+	private long R0; /* for the LRU offset system */
+	private long R1; /* for the LRU offset system */
+	private long R2; /* for the LRU offset system */
+
+	// Trees - PRETREE, MAINTREE, LENGTH, ALIGNED
+	protected short[] mainTreeLengtsTable;
+	protected short[] mainTreeTable;
+
+	protected short[] lengthTreeTable;
+	protected short[] lengthTreeLengtsTable;
+
+	protected short[] alignedLenTable;
+	protected short[] alignedTreeTable;
+
+	protected short[] getMainTreeTable() {
+		return mainTreeTable;
+	}
+
+	protected short[] getAlignedTreeTable() {
+		return alignedTreeTable;
+	}
+
+	protected void setAlignedTreeTable(short[] alignedTreeTable) {
+		this.alignedTreeTable = alignedTreeTable;
+	}
+
+	protected short[] getLengthTreeTable() {
+		if (lengthTreeTable != null)
+			return this.lengthTreeTable;
+		else
+			throw new ChmParsingException("lengthTreeTable is null");
+	}
+
+	protected void setLengthTreeTable(short[] lengthTreeTable) {
+		this.lengthTreeTable = lengthTreeTable;
+	}
+
+	protected void setMainTreeTable(short[] mainTreeTable) {
+		this.mainTreeTable = mainTreeTable;
+	}
+
+	protected short[] getAlignedLenTable() {
+		return this.alignedLenTable;
+	}
+
+	protected void setAlignedLenTable(short[] alignedLenTable) {
+		this.alignedLenTable = alignedLenTable;
+	}
+
+	/**
+	 * It suits for informative outlook
+	 */
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append("actual decoding window:=" + getWindow()
+				+ System.getProperty("line.separator"));
+		sb.append("window size (32Kb through 2Mb):=" + getWindowSize()
+				+ System.getProperty("line.separator"));
+		sb.append("current offset within the window:=" + getWindowPosition()
+				+ System.getProperty("line.separator"));
+		sb.append("number of main tree elements:=" + getMainTreeElements()
+				+ System.getProperty("line.separator"));
+		sb.append("have we started decoding at all yet?:=" + getHadStarted()
+				+ System.getProperty("line.separator"));
+		sb.append("type of this block:=" + getBlockType()
+				+ System.getProperty("line.separator"));
+		sb.append("uncompressed length of this block:=" + getBlockLength()
+				+ System.getProperty("line.separator"));
+		sb.append("uncompressed bytes still left to decode:="
+				+ getBlockRemaining() + System.getProperty("line.separator"));
+		sb.append("the number of CFDATA blocks processed:=" + getFramesRead()
+				+ System.getProperty("line.separator"));
+		sb.append("magic header value used for transform:="
+				+ getIntelFileSize() + System.getProperty("line.separator"));
+		sb.append("current offset in transform space:="
+				+ getIntelCurrentPossition()
+				+ System.getProperty("line.separator"));
+		sb.append("have we seen any translatable data yet?:=" + getIntelState()
+				+ System.getProperty("line.separator"));
+		sb.append("R0 for the LRU offset system:=" + getR0()
+				+ System.getProperty("line.separator"));
+		sb.append("R1 for the LRU offset system:=" + getR1()
+				+ System.getProperty("line.separator"));
+		sb.append("R2 for the LRU offset system:=" + getR2()
+				+ System.getProperty("line.separator"));
+		sb.append("main tree length:=" + getMainTreeLengtsTable().length
+				+ System.getProperty("line.separator"));
+		sb.append("secondary tree length:=" + getLengthTreeLengtsTable().length
+				+ System.getProperty("line.separator"));
+		return sb.toString();
+	}
+
+	public ChmLzxState(int window) {
+		if (window >= 0) {
+			int position_slots;
+			int win = ChmCommons.getWindowSize(window);
+			setWindowSize(1 << win);
+			/* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
+			if (win < 15 || win > 21)
+				System.err
+						.println("window less than 15 or window greater than 21");
+			/* Calculates required position slots */
+			if (win == 20)
+				position_slots = 42;
+			else if (win == 21)
+				position_slots = 50;
+			else
+				position_slots = win << 1;
+
+			setR0(1);
+			setR1(1);
+			setR2(1);
+			setMainTreeElements(512);
+			setHadStarted(LzxState.NOT_STARTED_DECODING);
+			setFramesRead(0);
+			setBlockRemaining(0);
+			setBlockType(ChmConstants.LZX_BLOCKTYPE_INVALID);
+			setIntelCurrentPossition(0);
+			setIntelState(IntelState.NOT_STARTED);
+			setWindowPosition(0);
+			setMainTreeLengtsTable(new short[getMainTreeElements()]);
+			setLengthTreeLengtsTable(new short[ChmConstants.LZX_NUM_SECONDARY_LENGTHS]);
+		} else
+			throw new CancellationException(
+					"window size should be more than zero");
+	}
+
+	protected void setWindow(int window) {
+		this.window = window;
+	}
+
+	protected int getWindow() {
+		return window;
+	}
+
+	protected void setWindowSize(long window_size) {
+		this.window_size = window_size;
+	}
+
+	protected long getWindowSize() {
+		return window_size;
+	}
+
+	protected void setWindowPosition(int window_position) {
+		this.window_position = window_position;
+	}
+
+	protected int getWindowPosition() {
+		return window_position;
+	}
+
+	protected void setMainTreeElements(int main_tree_elements) {
+		this.main_tree_elements = main_tree_elements;
+	}
+
+	protected int getMainTreeElements() {
+		return main_tree_elements;
+	}
+
+	protected void setHadStarted(LzxState hadStarted) {
+		this.hadStarted = hadStarted;
+	}
+
+	protected LzxState getHadStarted() {
+		return hadStarted;
+	}
+
+	protected void setBlockType(int block_type) {
+		this.block_type = block_type;
+	}
+
+	public int getBlockType() {
+		return block_type;
+	}
+
+	protected void setBlockLength(int block_length) {
+		this.block_length = block_length;
+	}
+
+	protected int getBlockLength() {
+		return block_length;
+	}
+
+	protected void setBlockRemaining(int block_remaining) {
+		this.block_remaining = block_remaining;
+	}
+
+	protected int getBlockRemaining() {
+		return block_remaining;
+	}
+
+	protected void setFramesRead(int frames_read) {
+		this.frames_read = frames_read;
+	}
+
+	protected void increaseFramesRead() {
+		this.frames_read = getFramesRead() + 1;
+	}
+
+	protected int getFramesRead() {
+		return frames_read;
+	}
+
+	protected void setIntelFileSize(int intel_file_size) {
+		this.intel_file_size = intel_file_size;
+	}
+
+	protected int getIntelFileSize() {
+		return intel_file_size;
+	}
+
+	protected void setIntelCurrentPossition(long intel_current_possition) {
+		this.intel_current_possition = intel_current_possition;
+	}
+
+	protected long getIntelCurrentPossition() {
+		return intel_current_possition;
+	}
+
+	protected void setIntelState(IntelState intel_state) {
+		this.intel_state = intel_state;
+	}
+
+	protected IntelState getIntelState() {
+		return intel_state;
+	}
+
+	protected void setR0(long r0) {
+		R0 = r0;
+	}
+
+	protected long getR0() {
+		return R0;
+	}
+
+	protected void setR1(long r1) {
+		R1 = r1;
+	}
+
+	protected long getR1() {
+		return R1;
+	}
+
+	protected void setR2(long r2) {
+		R2 = r2;
+	}
+
+	protected long getR2() {
+		return R2;
+	}
+
+	public static void main(String[] args) {
+	}
+
+	public void setMainTreeLengtsTable(short[] mainTreeLengtsTable) {
+		this.mainTreeLengtsTable = mainTreeLengtsTable;
+	}
+
+	public short[] getMainTreeLengtsTable() {
+		return mainTreeLengtsTable;
+	}
+
+	public void setLengthTreeLengtsTable(short[] lengthTreeLengtsTable) {
+		this.lengthTreeLengtsTable = lengthTreeLengtsTable;
+	}
+
+	public short[] getLengthTreeLengtsTable() {
+		return lengthTreeLengtsTable;
+	}
+}