You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:31 UTC

[25/39] tika git commit: Convert new lines from windows to unix

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
index 119a47b..e423871 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
@@ -1,102 +1,102 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.core;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-public class ChmConstants {
-    /* Prevents instantiation */
-    private ChmConstants() {
-    }
-
-    public static final String DEFAULT_CHARSET = UTF_8.name();
-    public static final String ITSF = "ITSF";
-    public static final String ITSP = "ITSP";
-    public static final String PMGL = "PMGL";
-    public static final String LZXC = "LZXC";
-    public static final String CHM_PMGI_MARKER = "PMGI";
-    public static final int BYTE_ARRAY_LENGHT = 16;
-    public static final int CHM_ITSF_V2_LEN = 0x58;
-    public static final int CHM_ITSF_V3_LEN = 0x60;
-    public static final int CHM_ITSP_V1_LEN = 0x54;
-    public static final int CHM_PMGL_LEN = 0x14;
-    public static final int CHM_PMGI_LEN = 0x08;
-    public static final int CHM_LZXC_RESETTABLE_V1_LEN = 0x28;
-    public static final int CHM_LZXC_MIN_LEN = 0x18;
-    public static final int CHM_LZXC_V2_LEN = 0x1c;
-    public static final int CHM_SIGNATURE_LEN = 4;
-    public static final int CHM_VER_2 = 2;
-    public static final int CHM_VER_3 = 3;
-    public static final int CHM_VER_1 = 1;
-    public static final int CHM_WINDOW_SIZE_BLOCK = 0x8000;
-
-    /* my hacking */
-    public static final int START_PMGL = 0xCC;
-    public static final String CONTROL_DATA = "ControlData";
-    public static final String RESET_TABLE = "ResetTable";
-    public static final String CONTENT = "Content";
-
-    /* some constants defined by the LZX specification */
-    public static final int LZX_MIN_MATCH = 2;
-    public static final int LZX_MAX_MATCH = 257;
-    public static final int LZX_NUM_CHARS = 256;
-    public static final int LZX_BLOCKTYPE_INVALID = 0; /*
-                                                        * also blocktypes 4-7
-                                                        * invalid
-                                                        */
-    public static final int LZX_BLOCKTYPE_VERBATIM = 1;
-    public static final int LZX_BLOCKTYPE_ALIGNED = 2;
-    public static final int LZX_BLOCKTYPE_UNCOMPRESSED = 3;
-    public static final int LZX_PRETREE_NUM_ELEMENTS_BITS = 4; /* ??? */
-    public static final int LZX_PRETREE_NUM_ELEMENTS = 20;
-    public static final int LZX_ALIGNED_NUM_ELEMENTS = 8; /*
-                                                           * aligned offset tree
-                                                           * #elements
-                                                           */
-    public static final int LZX_NUM_PRIMARY_LENGTHS = 7; /*
-                                                          * this one missing
-                                                          * from spec!
-                                                          */
-    public static final int LZX_NUM_SECONDARY_LENGTHS = 249; /*
-                                                              * length tree
-                                                              * #elements
-                                                              */
-
-    /* LZX huffman defines: tweak tablebits as desired */
-    public static final int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
-    public static final int LZX_PRETREE_TABLEBITS = 6;
-    public static final int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
-    public static final int LZX_MAIN_MAXSYMBOLS = LZX_NUM_CHARS * 2;
-    public static final int LZX_MAINTREE_TABLEBITS = 12;
-    public static final int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
-    public static final int LZX_LENGTH_TABLEBITS = 12;
-    public static final int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
-    public static final int LZX_ALIGNED_TABLEBITS = 7;
-    public static final int LZX_LENTABLE_SAFETY = 64;
-
-    public static short[] EXTRA_BITS = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
-            5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
-            15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-            17, 17 };
-
-    public static int[] POSITION_BASE = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32,
-            48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072,
-            4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304,
-            131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504,
-            1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 1835008,
-            1966080, 2097152 };
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.core;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+public class ChmConstants {
+    /* Prevents instantiation */
+    private ChmConstants() {
+    }
+
+    public static final String DEFAULT_CHARSET = UTF_8.name();
+    public static final String ITSF = "ITSF";
+    public static final String ITSP = "ITSP";
+    public static final String PMGL = "PMGL";
+    public static final String LZXC = "LZXC";
+    public static final String CHM_PMGI_MARKER = "PMGI";
+    public static final int BYTE_ARRAY_LENGHT = 16;
+    public static final int CHM_ITSF_V2_LEN = 0x58;
+    public static final int CHM_ITSF_V3_LEN = 0x60;
+    public static final int CHM_ITSP_V1_LEN = 0x54;
+    public static final int CHM_PMGL_LEN = 0x14;
+    public static final int CHM_PMGI_LEN = 0x08;
+    public static final int CHM_LZXC_RESETTABLE_V1_LEN = 0x28;
+    public static final int CHM_LZXC_MIN_LEN = 0x18;
+    public static final int CHM_LZXC_V2_LEN = 0x1c;
+    public static final int CHM_SIGNATURE_LEN = 4;
+    public static final int CHM_VER_2 = 2;
+    public static final int CHM_VER_3 = 3;
+    public static final int CHM_VER_1 = 1;
+    public static final int CHM_WINDOW_SIZE_BLOCK = 0x8000;
+
+    /* my hacking */
+    public static final int START_PMGL = 0xCC;
+    public static final String CONTROL_DATA = "ControlData";
+    public static final String RESET_TABLE = "ResetTable";
+    public static final String CONTENT = "Content";
+
+    /* some constants defined by the LZX specification */
+    public static final int LZX_MIN_MATCH = 2;
+    public static final int LZX_MAX_MATCH = 257;
+    public static final int LZX_NUM_CHARS = 256;
+    public static final int LZX_BLOCKTYPE_INVALID = 0; /*
+                                                        * also blocktypes 4-7
+                                                        * invalid
+                                                        */
+    public static final int LZX_BLOCKTYPE_VERBATIM = 1;
+    public static final int LZX_BLOCKTYPE_ALIGNED = 2;
+    public static final int LZX_BLOCKTYPE_UNCOMPRESSED = 3;
+    public static final int LZX_PRETREE_NUM_ELEMENTS_BITS = 4; /* ??? */
+    public static final int LZX_PRETREE_NUM_ELEMENTS = 20;
+    public static final int LZX_ALIGNED_NUM_ELEMENTS = 8; /*
+                                                           * aligned offset tree
+                                                           * #elements
+                                                           */
+    public static final int LZX_NUM_PRIMARY_LENGTHS = 7; /*
+                                                          * this one missing
+                                                          * from spec!
+                                                          */
+    public static final int LZX_NUM_SECONDARY_LENGTHS = 249; /*
+                                                              * length tree
+                                                              * #elements
+                                                              */
+
+    /* LZX huffman defines: tweak tablebits as desired */
+    public static final int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
+    public static final int LZX_PRETREE_TABLEBITS = 6;
+    public static final int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
+    public static final int LZX_MAIN_MAXSYMBOLS = LZX_NUM_CHARS * 2;
+    public static final int LZX_MAINTREE_TABLEBITS = 12;
+    public static final int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
+    public static final int LZX_LENGTH_TABLEBITS = 12;
+    public static final int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
+    public static final int LZX_ALIGNED_TABLEBITS = 7;
+    public static final int LZX_LENTABLE_SAFETY = 64;
+
+    public static short[] EXTRA_BITS = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
+            5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
+            15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+            17, 17 };
+
+    public static int[] POSITION_BASE = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32,
+            48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072,
+            4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304,
+            131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504,
+            1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 1835008,
+            1966080, 2097152 };
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
index 85f4177..454c1c4 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
@@ -1,392 +1,392 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.core;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
-import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
-import org.apache.tika.parser.chm.accessor.ChmItspHeader;
-import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
-import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
-import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
-import org.apache.tika.parser.chm.assertion.ChmAssert;
-import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
-import org.apache.tika.parser.chm.lzx.ChmBlockInfo;
-import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-/**
- * Extracts text from chm file. Enumerates chm entries.
- */
-public class ChmExtractor {
-    private List<ChmLzxBlock> lzxBlocksCache = null;
-    private ChmDirectoryListingSet chmDirList = null;
-    private ChmItsfHeader chmItsfHeader = null;
-    private ChmItspHeader chmItspHeader = null;
-    private ChmLzxcResetTable chmLzxcResetTable = null;
-    private ChmLzxcControlData chmLzxcControlData = null;
-    private byte[] data = null;
-    private int indexOfContent;
-    private long lzxBlockOffset;
-    private long lzxBlockLength;
-
-    /**
-     * Returns lzxc control data.
-     * 
-     * @return ChmLzxcControlData
-     */
-    private ChmLzxcControlData getChmLzxcControlData() {
-        return chmLzxcControlData;
-    }
-
-    /**
-     * Sets lzxc control data
-     * 
-     * @param chmLzxcControlData
-     */
-    private void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
-        this.chmLzxcControlData = chmLzxcControlData;
-    }
-
-    private ChmItspHeader getChmItspHeader() {
-        return chmItspHeader;
-    }
-
-    private void setChmItspHeader(ChmItspHeader chmItspHeader) {
-        this.chmItspHeader = chmItspHeader;
-    }
-
-    /**
-     * Returns lzxc reset table
-     * 
-     * @return ChmLzxcResetTable
-     */
-    private ChmLzxcResetTable getChmLzxcResetTable() {
-        return chmLzxcResetTable;
-    }
-
-    /**
-     * Sets lzxc reset table
-     * 
-     * @param chmLzxcResetTable
-     */
-    private void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
-        this.chmLzxcResetTable = chmLzxcResetTable;
-    }
-
-    /**
-     * Returns lzxc hit_cache length
-     * 
-     * @return lzxBlockLength
-     */
-    private long getLzxBlockLength() {
-        return lzxBlockLength;
-    }
-
-    /**
-     * Sets lzxc hit_cache length
-     * 
-     * @param lzxBlockLength
-     */
-    private void setLzxBlockLength(long lzxBlockLength) {
-        this.lzxBlockLength = lzxBlockLength;
-    }
-
-    /**
-     * Returns lzxc hit_cache offset
-     * 
-     * @return lzxBlockOffset
-     */
-    private long getLzxBlockOffset() {
-        return lzxBlockOffset;
-    }
-
-    /**
-     * Sets lzxc hit_cache offset
-     */
-    private void setLzxBlockOffset(long lzxBlockOffset) {
-        this.lzxBlockOffset = lzxBlockOffset;
-    }
-
-    private int getIndexOfContent() {
-        return indexOfContent;
-    }
-
-    private void setIndexOfContent(int indexOfContent) {
-        this.indexOfContent = indexOfContent;
-    }
-
-    private byte[] getData() {
-        return data;
-    }
-
-    private void setData(byte[] data) {
-        this.data = data;
-    }
-
-    public ChmExtractor(InputStream is) throws TikaException, IOException {
-        ChmAssert.assertInputStreamNotNull(is);
-        try {
-            setData(IOUtils.toByteArray(is));
-
-            /* Creates and parses chm itsf header */
-            setChmItsfHeader(new ChmItsfHeader());
-            // getChmItsfHeader().parse(Arrays.copyOfRange(getData(), 0,
-            // ChmConstants.CHM_ITSF_V3_LEN - 1), getChmItsfHeader());
-            getChmItsfHeader().parse(ChmCommons.copyOfRange(getData(), 0,
-                            ChmConstants.CHM_ITSF_V3_LEN - 1), getChmItsfHeader());
-
-            /* Creates and parses chm itsp header */
-            setChmItspHeader(new ChmItspHeader());
-            // getChmItspHeader().parse(Arrays.copyOfRange( getData(), (int)
-            // getChmItsfHeader().getDirOffset(),
-            // (int) getChmItsfHeader().getDirOffset() +
-            // ChmConstants.CHM_ITSP_V1_LEN), getChmItspHeader());
-            getChmItspHeader().parse(
-                    ChmCommons.copyOfRange(getData(), (int) getChmItsfHeader()
-                            .getDirOffset(), (int) getChmItsfHeader().getDirOffset() + 
-                            ChmConstants.CHM_ITSP_V1_LEN), getChmItspHeader());
-
-            /* Creates instance of ChmDirListingContainer */
-            setChmDirList(new ChmDirectoryListingSet(getData(),
-                    getChmItsfHeader(), getChmItspHeader()));
-
-            int indexOfControlData = getChmDirList().getControlDataIndex();
-            int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
-                    ChmConstants.LZXC.getBytes(UTF_8));
-            byte[] dir_chunk = null;
-            if (indexOfResetData > 0)
-                dir_chunk = ChmCommons.copyOfRange( getData(), indexOfResetData, indexOfResetData  
-                        + getChmDirList().getDirectoryListingEntryList().get(indexOfControlData).getLength());
-            // dir_chunk = Arrays.copyOfRange(getData(), indexOfResetData,
-            // indexOfResetData
-            // +
-            // getChmDirList().getDirectoryListingEntryList().get(indexOfControlData).getLength());
-
-            /* Creates and parses chm control data */
-            setChmLzxcControlData(new ChmLzxcControlData());
-            getChmLzxcControlData().parse(dir_chunk, getChmLzxcControlData());
-
-            int indexOfResetTable = getChmDirList().getResetTableIndex();
-            setChmLzxcResetTable(new ChmLzxcResetTable());
-
-            int startIndex = (int) getChmDirList().getDataOffset()
-                    + getChmDirList().getDirectoryListingEntryList()
-                            .get(indexOfResetTable).getOffset();
-
-            // assert startIndex < data.length
-            ChmAssert.assertCopyingDataIndex(startIndex, getData().length);
-
-            // dir_chunk = Arrays.copyOfRange(getData(), startIndex, startIndex
-            // +
-            // getChmDirList().getDirectoryListingEntryList().get(indexOfResetTable).getLength());
-            dir_chunk = ChmCommons.copyOfRange(getData(), startIndex, startIndex
-                            + getChmDirList().getDirectoryListingEntryList().get(indexOfResetTable).getLength());
-
-            getChmLzxcResetTable().parse(dir_chunk, getChmLzxcResetTable());
-
-            setIndexOfContent(ChmCommons.indexOf(getChmDirList().getDirectoryListingEntryList(), 
-                    ChmConstants.CONTENT));
-            setLzxBlockOffset((getChmDirList().getDirectoryListingEntryList().get(getIndexOfContent()).getOffset() 
-                    + getChmItsfHeader().getDataOffset()));
-            setLzxBlockLength(getChmDirList().getDirectoryListingEntryList().get(getIndexOfContent()).getLength());
-
-            setLzxBlocksCache(new ArrayList<ChmLzxBlock>());
-
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-    }
-
-    /**
-     * Enumerates chm entities
-     * 
-     * @return list of chm entities
-     */
-    public List<String> enumerateChm() {
-        List<String> listOfEntries = new ArrayList<String>();
-        for (DirectoryListingEntry directoryListingEntry : getChmDirList().getDirectoryListingEntryList()) {
-            listOfEntries.add(directoryListingEntry.getName());
-        }
-        return listOfEntries;
-    }
-
-    /**
-     * Decompresses a chm entry
-     * 
-     * @param directoryListingEntry
-     * 
-     * @return decompressed data
-     * @throws TikaException 
-     */
-    public byte[] extractChmEntry(DirectoryListingEntry directoryListingEntry) throws TikaException {
-        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-        ChmLzxBlock lzxBlock = null;
-        try {
-            /* UNCOMPRESSED type is easiest one */
-            if (directoryListingEntry.getEntryType() == EntryType.UNCOMPRESSED
-                    && directoryListingEntry.getLength() > 0
-                    && !ChmCommons.hasSkip(directoryListingEntry)) {
-                int dataOffset = (int) (getChmItsfHeader().getDataOffset() + directoryListingEntry
-                        .getOffset());
-                // dataSegment = Arrays.copyOfRange(getData(), dataOffset,
-                // dataOffset + directoryListingEntry.getLength());
-                buffer.write(ChmCommons.copyOfRange(
-                        getData(), dataOffset,
-                        dataOffset + directoryListingEntry.getLength()));
-            } else if (directoryListingEntry.getEntryType() == EntryType.COMPRESSED
-                    && !ChmCommons.hasSkip(directoryListingEntry)) {
-                /* Gets a chm hit_cache info */
-                ChmBlockInfo bb = ChmBlockInfo.getChmBlockInfoInstance(
-                        directoryListingEntry, (int) getChmLzxcResetTable()
-                                .getBlockLen(), getChmLzxcControlData());
-
-                int i = 0, start = 0, hit_cache = 0;
-
-                if ((getLzxBlockLength() < Integer.MAX_VALUE)
-                        && (getLzxBlockOffset() < Integer.MAX_VALUE)) {
-                    // TODO: Improve the caching
-                    // caching ... = O(n^2) - depends on startBlock and endBlock
-                    start = -1;
-                    if (!getLzxBlocksCache().isEmpty()) {
-                        for (i = 0; i < getLzxBlocksCache().size(); i++) {
-                            //lzxBlock = getLzxBlocksCache().get(i);
-                            int bn = getLzxBlocksCache().get(i).getBlockNumber();
-                            for (int j = bb.getIniBlock(); j <= bb.getStartBlock(); j++) {
-                                if (bn == j) {
-                                    if (j > start) {
-                                        start = j;
-                                        hit_cache = i;
-                                    }
-                                }
-                            }
-                            if (start == bb.getStartBlock())
-                                break;
-                        }
-                    }
-
-//                    if (i == getLzxBlocksCache().size() && i == 0) {
-                    if (start<0) {
-                        start = bb.getIniBlock();
-
-                        byte[] dataSegment = ChmCommons.getChmBlockSegment(
-                                getData(),
-                                getChmLzxcResetTable(), start,
-                                (int) getLzxBlockOffset(),
-                                (int) getLzxBlockLength());
-
-                        lzxBlock = new ChmLzxBlock(start, dataSegment,
-                                getChmLzxcResetTable().getBlockLen(), null);
-
-                        getLzxBlocksCache().add(lzxBlock);
-                    } else {
-                        lzxBlock = getLzxBlocksCache().get(hit_cache);
-                    }
-
-                    for (i = start; i <= bb.getEndBlock();) {
-                        if (i == bb.getStartBlock() && i == bb.getEndBlock()) {
-                            buffer.write(lzxBlock.getContent(
-                                    bb.getStartOffset(), bb.getEndOffset()));
-                            break;
-                        }
-
-                        if (i == bb.getStartBlock()) {
-                            buffer.write(lzxBlock.getContent(
-                                    bb.getStartOffset()));
-                        }
-
-                        if (i > bb.getStartBlock() && i < bb.getEndBlock()) {
-                            buffer.write(lzxBlock.getContent());
-                        }
-
-                        if (i == bb.getEndBlock()) {
-                            buffer.write(lzxBlock.getContent(
-                                    0, bb.getEndOffset()));
-                            break;
-                        }
-
-                        i++;
-
-                        if (i % getChmLzxcControlData().getResetInterval() == 0) {
-                            lzxBlock = new ChmLzxBlock(i,
-                                    ChmCommons.getChmBlockSegment(getData(),
-                                            getChmLzxcResetTable(), i,
-                                            (int) getLzxBlockOffset(),
-                                            (int) getLzxBlockLength()),
-                                    getChmLzxcResetTable().getBlockLen(), null);
-                        } else {
-                            lzxBlock = new ChmLzxBlock(i,
-                                    ChmCommons.getChmBlockSegment(getData(),
-                                            getChmLzxcResetTable(), i,
-                                            (int) getLzxBlockOffset(),
-                                            (int) getLzxBlockLength()),
-                                    getChmLzxcResetTable().getBlockLen(),
-                                    lzxBlock);
-                        }
-
-                        getLzxBlocksCache().add(lzxBlock);
-                    }
-
-                    if (getLzxBlocksCache().size() > getChmLzxcResetTable()
-                            .getBlockCount()) {
-                        getLzxBlocksCache().clear();
-                    }
-                } //end of if
-                
-                if (buffer.size() != directoryListingEntry.getLength()) {
-                    throw new TikaException("CHM file extract error: extracted Length is wrong.");
-                }
-            } //end of if compressed
-        } catch (Exception e) {
-            throw new TikaException(e.getMessage());
-        }
-
-        return buffer.toByteArray();
-    }
-
-    private void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
-        this.lzxBlocksCache = lzxBlocksCache;
-    }
-
-    private List<ChmLzxBlock> getLzxBlocksCache() {
-        return lzxBlocksCache;
-    }
-
-    private void setChmDirList(ChmDirectoryListingSet chmDirList) {
-        this.chmDirList = chmDirList;
-    }
-
-    public ChmDirectoryListingSet getChmDirList() {
-        return chmDirList;
-    }
-
-    private void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
-        this.chmItsfHeader = chmItsfHeader;
-    }
-
-    private ChmItsfHeader getChmItsfHeader() {
-        return chmItsfHeader;
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.core;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.apache.tika.parser.chm.assertion.ChmAssert;
+import org.apache.tika.parser.chm.core.ChmCommons.EntryType;
+import org.apache.tika.parser.chm.lzx.ChmBlockInfo;
+import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * Extracts text from chm file. Enumerates chm entries.
+ */
+public class ChmExtractor {
+    private List<ChmLzxBlock> lzxBlocksCache = null;
+    private ChmDirectoryListingSet chmDirList = null;
+    private ChmItsfHeader chmItsfHeader = null;
+    private ChmItspHeader chmItspHeader = null;
+    private ChmLzxcResetTable chmLzxcResetTable = null;
+    private ChmLzxcControlData chmLzxcControlData = null;
+    private byte[] data = null;
+    private int indexOfContent;
+    private long lzxBlockOffset;
+    private long lzxBlockLength;
+
+    /**
+     * Returns lzxc control data.
+     * 
+     * @return ChmLzxcControlData
+     */
+    private ChmLzxcControlData getChmLzxcControlData() {
+        return chmLzxcControlData;
+    }
+
+    /**
+     * Sets lzxc control data
+     * 
+     * @param chmLzxcControlData
+     */
+    private void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
+        this.chmLzxcControlData = chmLzxcControlData;
+    }
+
+    private ChmItspHeader getChmItspHeader() {
+        return chmItspHeader;
+    }
+
+    private void setChmItspHeader(ChmItspHeader chmItspHeader) {
+        this.chmItspHeader = chmItspHeader;
+    }
+
+    /**
+     * Returns lzxc reset table
+     * 
+     * @return ChmLzxcResetTable
+     */
+    private ChmLzxcResetTable getChmLzxcResetTable() {
+        return chmLzxcResetTable;
+    }
+
+    /**
+     * Sets lzxc reset table
+     * 
+     * @param chmLzxcResetTable
+     */
+    private void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
+        this.chmLzxcResetTable = chmLzxcResetTable;
+    }
+
+    /**
+     * Returns lzxc hit_cache length
+     * 
+     * @return lzxBlockLength
+     */
+    private long getLzxBlockLength() {
+        return lzxBlockLength;
+    }
+
+    /**
+     * Sets lzxc hit_cache length
+     * 
+     * @param lzxBlockLength
+     */
+    private void setLzxBlockLength(long lzxBlockLength) {
+        this.lzxBlockLength = lzxBlockLength;
+    }
+
+    /**
+     * Returns lzxc hit_cache offset
+     * 
+     * @return lzxBlockOffset
+     */
+    private long getLzxBlockOffset() {
+        return lzxBlockOffset;
+    }
+
+    /**
+     * Sets lzxc hit_cache offset
+     */
+    private void setLzxBlockOffset(long lzxBlockOffset) {
+        this.lzxBlockOffset = lzxBlockOffset;
+    }
+
+    private int getIndexOfContent() {
+        return indexOfContent;
+    }
+
+    private void setIndexOfContent(int indexOfContent) {
+        this.indexOfContent = indexOfContent;
+    }
+
+    private byte[] getData() {
+        return data;
+    }
+
+    private void setData(byte[] data) {
+        this.data = data;
+    }
+
+    public ChmExtractor(InputStream is) throws TikaException, IOException {
+        ChmAssert.assertInputStreamNotNull(is);
+        try {
+            setData(IOUtils.toByteArray(is));
+
+            /* Creates and parses chm itsf header */
+            setChmItsfHeader(new ChmItsfHeader());
+            // getChmItsfHeader().parse(Arrays.copyOfRange(getData(), 0,
+            // ChmConstants.CHM_ITSF_V3_LEN - 1), getChmItsfHeader());
+            getChmItsfHeader().parse(ChmCommons.copyOfRange(getData(), 0,
+                            ChmConstants.CHM_ITSF_V3_LEN - 1), getChmItsfHeader());
+
+            /* Creates and parses chm itsp header */
+            setChmItspHeader(new ChmItspHeader());
+            // getChmItspHeader().parse(Arrays.copyOfRange( getData(), (int)
+            // getChmItsfHeader().getDirOffset(),
+            // (int) getChmItsfHeader().getDirOffset() +
+            // ChmConstants.CHM_ITSP_V1_LEN), getChmItspHeader());
+            getChmItspHeader().parse(
+                    ChmCommons.copyOfRange(getData(), (int) getChmItsfHeader()
+                            .getDirOffset(), (int) getChmItsfHeader().getDirOffset() + 
+                            ChmConstants.CHM_ITSP_V1_LEN), getChmItspHeader());
+
+            /* Creates instance of ChmDirListingContainer */
+            setChmDirList(new ChmDirectoryListingSet(getData(),
+                    getChmItsfHeader(), getChmItspHeader()));
+
+            int indexOfControlData = getChmDirList().getControlDataIndex();
+            int indexOfResetData = ChmCommons.indexOfResetTableBlock(getData(),
+                    ChmConstants.LZXC.getBytes(UTF_8));
+            byte[] dir_chunk = null;
+            if (indexOfResetData > 0)
+                dir_chunk = ChmCommons.copyOfRange( getData(), indexOfResetData, indexOfResetData  
+                        + getChmDirList().getDirectoryListingEntryList().get(indexOfControlData).getLength());
+            // dir_chunk = Arrays.copyOfRange(getData(), indexOfResetData,
+            // indexOfResetData
+            // +
+            // getChmDirList().getDirectoryListingEntryList().get(indexOfControlData).getLength());
+
+            /* Creates and parses chm control data */
+            setChmLzxcControlData(new ChmLzxcControlData());
+            getChmLzxcControlData().parse(dir_chunk, getChmLzxcControlData());
+
+            int indexOfResetTable = getChmDirList().getResetTableIndex();
+            setChmLzxcResetTable(new ChmLzxcResetTable());
+
+            int startIndex = (int) getChmDirList().getDataOffset()
+                    + getChmDirList().getDirectoryListingEntryList()
+                            .get(indexOfResetTable).getOffset();
+
+            // assert startIndex < data.length
+            ChmAssert.assertCopyingDataIndex(startIndex, getData().length);
+
+            // dir_chunk = Arrays.copyOfRange(getData(), startIndex, startIndex
+            // +
+            // getChmDirList().getDirectoryListingEntryList().get(indexOfResetTable).getLength());
+            dir_chunk = ChmCommons.copyOfRange(getData(), startIndex, startIndex
+                            + getChmDirList().getDirectoryListingEntryList().get(indexOfResetTable).getLength());
+
+            getChmLzxcResetTable().parse(dir_chunk, getChmLzxcResetTable());
+
+            setIndexOfContent(ChmCommons.indexOf(getChmDirList().getDirectoryListingEntryList(), 
+                    ChmConstants.CONTENT));
+            setLzxBlockOffset((getChmDirList().getDirectoryListingEntryList().get(getIndexOfContent()).getOffset() 
+                    + getChmItsfHeader().getDataOffset()));
+            setLzxBlockLength(getChmDirList().getDirectoryListingEntryList().get(getIndexOfContent()).getLength());
+
+            setLzxBlocksCache(new ArrayList<ChmLzxBlock>());
+
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    /**
+     * Enumerates chm entities
+     * 
+     * @return list of chm entities
+     */
+    public List<String> enumerateChm() {
+        List<String> listOfEntries = new ArrayList<String>();
+        for (DirectoryListingEntry directoryListingEntry : getChmDirList().getDirectoryListingEntryList()) {
+            listOfEntries.add(directoryListingEntry.getName());
+        }
+        return listOfEntries;
+    }
+
+    /**
+     * Decompresses a chm entry
+     * 
+     * @param directoryListingEntry
+     * 
+     * @return decompressed data
+     * @throws TikaException 
+     */
+    public byte[] extractChmEntry(DirectoryListingEntry directoryListingEntry) throws TikaException {
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+        ChmLzxBlock lzxBlock = null;
+        try {
+            /* UNCOMPRESSED type is easiest one */
+            if (directoryListingEntry.getEntryType() == EntryType.UNCOMPRESSED
+                    && directoryListingEntry.getLength() > 0
+                    && !ChmCommons.hasSkip(directoryListingEntry)) {
+                int dataOffset = (int) (getChmItsfHeader().getDataOffset() + directoryListingEntry
+                        .getOffset());
+                // dataSegment = Arrays.copyOfRange(getData(), dataOffset,
+                // dataOffset + directoryListingEntry.getLength());
+                buffer.write(ChmCommons.copyOfRange(
+                        getData(), dataOffset,
+                        dataOffset + directoryListingEntry.getLength()));
+            } else if (directoryListingEntry.getEntryType() == EntryType.COMPRESSED
+                    && !ChmCommons.hasSkip(directoryListingEntry)) {
+                /* Gets a chm hit_cache info */
+                ChmBlockInfo bb = ChmBlockInfo.getChmBlockInfoInstance(
+                        directoryListingEntry, (int) getChmLzxcResetTable()
+                                .getBlockLen(), getChmLzxcControlData());
+
+                int i = 0, start = 0, hit_cache = 0;
+
+                if ((getLzxBlockLength() < Integer.MAX_VALUE)
+                        && (getLzxBlockOffset() < Integer.MAX_VALUE)) {
+                    // TODO: Improve the caching
+                    // caching ... = O(n^2) - depends on startBlock and endBlock
+                    start = -1;
+                    if (!getLzxBlocksCache().isEmpty()) {
+                        for (i = 0; i < getLzxBlocksCache().size(); i++) {
+                            //lzxBlock = getLzxBlocksCache().get(i);
+                            int bn = getLzxBlocksCache().get(i).getBlockNumber();
+                            for (int j = bb.getIniBlock(); j <= bb.getStartBlock(); j++) {
+                                if (bn == j) {
+                                    if (j > start) {
+                                        start = j;
+                                        hit_cache = i;
+                                    }
+                                }
+                            }
+                            if (start == bb.getStartBlock())
+                                break;
+                        }
+                    }
+
+//                    if (i == getLzxBlocksCache().size() && i == 0) {
+                    if (start<0) {
+                        start = bb.getIniBlock();
+
+                        byte[] dataSegment = ChmCommons.getChmBlockSegment(
+                                getData(),
+                                getChmLzxcResetTable(), start,
+                                (int) getLzxBlockOffset(),
+                                (int) getLzxBlockLength());
+
+                        lzxBlock = new ChmLzxBlock(start, dataSegment,
+                                getChmLzxcResetTable().getBlockLen(), null);
+
+                        getLzxBlocksCache().add(lzxBlock);
+                    } else {
+                        lzxBlock = getLzxBlocksCache().get(hit_cache);
+                    }
+
+                    for (i = start; i <= bb.getEndBlock();) {
+                        if (i == bb.getStartBlock() && i == bb.getEndBlock()) {
+                            buffer.write(lzxBlock.getContent(
+                                    bb.getStartOffset(), bb.getEndOffset()));
+                            break;
+                        }
+
+                        if (i == bb.getStartBlock()) {
+                            buffer.write(lzxBlock.getContent(
+                                    bb.getStartOffset()));
+                        }
+
+                        if (i > bb.getStartBlock() && i < bb.getEndBlock()) {
+                            buffer.write(lzxBlock.getContent());
+                        }
+
+                        if (i == bb.getEndBlock()) {
+                            buffer.write(lzxBlock.getContent(
+                                    0, bb.getEndOffset()));
+                            break;
+                        }
+
+                        i++;
+
+                        if (i % getChmLzxcControlData().getResetInterval() == 0) {
+                            lzxBlock = new ChmLzxBlock(i,
+                                    ChmCommons.getChmBlockSegment(getData(),
+                                            getChmLzxcResetTable(), i,
+                                            (int) getLzxBlockOffset(),
+                                            (int) getLzxBlockLength()),
+                                    getChmLzxcResetTable().getBlockLen(), null);
+                        } else {
+                            lzxBlock = new ChmLzxBlock(i,
+                                    ChmCommons.getChmBlockSegment(getData(),
+                                            getChmLzxcResetTable(), i,
+                                            (int) getLzxBlockOffset(),
+                                            (int) getLzxBlockLength()),
+                                    getChmLzxcResetTable().getBlockLen(),
+                                    lzxBlock);
+                        }
+
+                        getLzxBlocksCache().add(lzxBlock);
+                    }
+
+                    if (getLzxBlocksCache().size() > getChmLzxcResetTable()
+                            .getBlockCount()) {
+                        getLzxBlocksCache().clear();
+                    }
+                } //end of if
+                
+                if (buffer.size() != directoryListingEntry.getLength()) {
+                    throw new TikaException("CHM file extract error: extracted Length is wrong.");
+                }
+            } //end of if compressed
+        } catch (Exception e) {
+            throw new TikaException(e.getMessage());
+        }
+
+        return buffer.toByteArray();
+    }
+
+    private void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
+        this.lzxBlocksCache = lzxBlocksCache;
+    }
+
+    private List<ChmLzxBlock> getLzxBlocksCache() {
+        return lzxBlocksCache;
+    }
+
+    private void setChmDirList(ChmDirectoryListingSet chmDirList) {
+        this.chmDirList = chmDirList;
+    }
+
+    public ChmDirectoryListingSet getChmDirList() {
+        return chmDirList;
+    }
+
+    private void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
+        this.chmItsfHeader = chmItsfHeader;
+    }
+
+    private ChmItsfHeader getChmItsfHeader() {
+        return chmItsfHeader;
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java
index 03f81d3..9ed1898 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java
@@ -1,147 +1,147 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.parser.chm.core;
-
-import java.util.List;
-
-import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
-import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
-import org.apache.tika.parser.chm.accessor.ChmItspHeader;
-import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
-import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
-import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
-
-public class ChmWrapper {
-    private List<ChmLzxBlock> lzxBlocksCache = null;
-    private ChmDirectoryListingSet chmDirList = null;
-    private ChmItsfHeader chmItsfHeader = null;
-    private ChmItspHeader chmItspHeader = null;
-    private ChmLzxcResetTable chmLzxcResetTable = null;
-    private ChmLzxcControlData chmLzxcControlData = null;
-    private byte[] data = null;
-    private int indexOfContent;
-    private long lzxBlockOffset;
-    private long lzxBlockLength;
-    private int indexOfResetData;
-    private int indexOfResetTable;
-    private int startIndex;
-
-    protected int getStartIndex() {
-        return startIndex;
-    }
-
-    protected void setStartIndex(int startIndex) {
-        this.startIndex = startIndex;
-    }
-
-    protected int getIndexOfResetTable() {
-        return indexOfResetTable;
-    }
-
-    protected void setIndexOfResetTable(int indexOfResetTable) {
-        this.indexOfResetTable = indexOfResetTable;
-    }
-
-    protected List<ChmLzxBlock> getLzxBlocksCache() {
-        return lzxBlocksCache;
-    }
-
-    protected void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
-        this.lzxBlocksCache = lzxBlocksCache;
-    }
-
-    protected ChmDirectoryListingSet getChmDirList() {
-        return chmDirList;
-    }
-
-    protected void setChmDirList(ChmDirectoryListingSet chmDirList) {
-        this.chmDirList = chmDirList;
-    }
-
-    protected ChmItsfHeader getChmItsfHeader() {
-        return chmItsfHeader;
-    }
-
-    protected void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
-        this.chmItsfHeader = chmItsfHeader;
-    }
-
-    protected ChmLzxcResetTable getChmLzxcResetTable() {
-        return chmLzxcResetTable;
-    }
-
-    protected void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
-        this.chmLzxcResetTable = chmLzxcResetTable;
-    }
-
-    protected ChmLzxcControlData getChmLzxcControlData() {
-        return chmLzxcControlData;
-    }
-
-    protected void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
-        this.chmLzxcControlData = chmLzxcControlData;
-    }
-
-    protected byte[] getData() {
-        return data;
-    }
-
-    protected void setData(byte[] data) {
-        this.data = data;
-    }
-
-    protected int getIndexOfContent() {
-        return indexOfContent;
-    }
-
-    protected void setIndexOfContent(int indexOfContent) {
-        this.indexOfContent = indexOfContent;
-    }
-
-    protected long getLzxBlockOffset() {
-        return lzxBlockOffset;
-    }
-
-    protected void setLzxBlockOffset(long lzxBlockOffset) {
-        this.lzxBlockOffset = lzxBlockOffset;
-    }
-
-    protected long getLzxBlockLength() {
-        return lzxBlockLength;
-    }
-
-    protected void setLzxBlockLength(long lzxBlockLength) {
-        this.lzxBlockLength = lzxBlockLength;
-    }
-
-    protected void setChmItspHeader(ChmItspHeader chmItspHeader) {
-        this.chmItspHeader = chmItspHeader;
-    }
-
-    protected ChmItspHeader getChmItspHeader() {
-        return chmItspHeader;
-    }
-
-    protected void setIndexOfResetData(int indexOfResetData) {
-        this.indexOfResetData = indexOfResetData;
-    }
-
-    protected int getIndexOfResetData() {
-        return indexOfResetData;
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.chm.core;
+
+import java.util.List;
+
+import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
+import org.apache.tika.parser.chm.accessor.ChmItsfHeader;
+import org.apache.tika.parser.chm.accessor.ChmItspHeader;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable;
+import org.apache.tika.parser.chm.lzx.ChmLzxBlock;
+
+public class ChmWrapper {
+    private List<ChmLzxBlock> lzxBlocksCache = null;
+    private ChmDirectoryListingSet chmDirList = null;
+    private ChmItsfHeader chmItsfHeader = null;
+    private ChmItspHeader chmItspHeader = null;
+    private ChmLzxcResetTable chmLzxcResetTable = null;
+    private ChmLzxcControlData chmLzxcControlData = null;
+    private byte[] data = null;
+    private int indexOfContent;
+    private long lzxBlockOffset;
+    private long lzxBlockLength;
+    private int indexOfResetData;
+    private int indexOfResetTable;
+    private int startIndex;
+
+    protected int getStartIndex() {
+        return startIndex;
+    }
+
+    protected void setStartIndex(int startIndex) {
+        this.startIndex = startIndex;
+    }
+
+    protected int getIndexOfResetTable() {
+        return indexOfResetTable;
+    }
+
+    protected void setIndexOfResetTable(int indexOfResetTable) {
+        this.indexOfResetTable = indexOfResetTable;
+    }
+
+    protected List<ChmLzxBlock> getLzxBlocksCache() {
+        return lzxBlocksCache;
+    }
+
+    protected void setLzxBlocksCache(List<ChmLzxBlock> lzxBlocksCache) {
+        this.lzxBlocksCache = lzxBlocksCache;
+    }
+
+    protected ChmDirectoryListingSet getChmDirList() {
+        return chmDirList;
+    }
+
+    protected void setChmDirList(ChmDirectoryListingSet chmDirList) {
+        this.chmDirList = chmDirList;
+    }
+
+    protected ChmItsfHeader getChmItsfHeader() {
+        return chmItsfHeader;
+    }
+
+    protected void setChmItsfHeader(ChmItsfHeader chmItsfHeader) {
+        this.chmItsfHeader = chmItsfHeader;
+    }
+
+    protected ChmLzxcResetTable getChmLzxcResetTable() {
+        return chmLzxcResetTable;
+    }
+
+    protected void setChmLzxcResetTable(ChmLzxcResetTable chmLzxcResetTable) {
+        this.chmLzxcResetTable = chmLzxcResetTable;
+    }
+
+    protected ChmLzxcControlData getChmLzxcControlData() {
+        return chmLzxcControlData;
+    }
+
+    protected void setChmLzxcControlData(ChmLzxcControlData chmLzxcControlData) {
+        this.chmLzxcControlData = chmLzxcControlData;
+    }
+
+    protected byte[] getData() {
+        return data;
+    }
+
+    protected void setData(byte[] data) {
+        this.data = data;
+    }
+
+    protected int getIndexOfContent() {
+        return indexOfContent;
+    }
+
+    protected void setIndexOfContent(int indexOfContent) {
+        this.indexOfContent = indexOfContent;
+    }
+
+    protected long getLzxBlockOffset() {
+        return lzxBlockOffset;
+    }
+
+    protected void setLzxBlockOffset(long lzxBlockOffset) {
+        this.lzxBlockOffset = lzxBlockOffset;
+    }
+
+    protected long getLzxBlockLength() {
+        return lzxBlockLength;
+    }
+
+    protected void setLzxBlockLength(long lzxBlockLength) {
+        this.lzxBlockLength = lzxBlockLength;
+    }
+
+    protected void setChmItspHeader(ChmItspHeader chmItspHeader) {
+        this.chmItspHeader = chmItspHeader;
+    }
+
+    protected ChmItspHeader getChmItspHeader() {
+        return chmItspHeader;
+    }
+
+    protected void setIndexOfResetData(int indexOfResetData) {
+        this.indexOfResetData = indexOfResetData;
+    }
+
+    protected int getIndexOfResetData() {
+        return indexOfResetData;
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java
index fbed908..46c522b 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java
@@ -1,27 +1,27 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.exception;
-
-import org.apache.tika.exception.TikaException;
-
-public class ChmParsingException extends TikaException {
-    private static final long serialVersionUID = 6497936044733665210L;
-
-    public ChmParsingException(String description) {
-        super(description);
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.exception;
+
+import org.apache.tika.exception.TikaException;
+
+public class ChmParsingException extends TikaException {
+    private static final long serialVersionUID = 6497936044733665210L;
+
+    public ChmParsingException(String description) {
+        super(description);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java
index 7f7564d..cda829c 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java
@@ -1,235 +1,235 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.lzx;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
-import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
-import org.apache.tika.parser.chm.exception.ChmParsingException;
-
-/**
- * A container that contains chm block information such as: i. initial block is
- * using to reset main tree ii. start block is using for knowing where to start
- * iii. end block is using for knowing where to stop iv. start offset is using
- * for knowing where to start reading v. end offset is using for knowing where
- * to stop reading
- * 
- */
-public class ChmBlockInfo {
-    /* class members */
-    private int iniBlock;
-    private int startBlock;
-    private int endBlock;
-    private int startOffset;
-    private int endOffset;
-
-    private static ChmBlockInfo chmBlockInfo = null;
-
-    private ChmBlockInfo() {
-
-    }
-
-    /**
-     * Returns an information related to the chmBlockInfo
-     * 
-     * @param dle
-     *            - DirectoryListingEntry
-     * @param bytesPerBlock
-     *            - int, = chmLzxcResetTable.block_length
-     * @param clcd
-     *            - ChmLzxcControlData
-     * @param chmBlockInfo
-     *            - ChmBlockInfo
-     * 
-     * @return ChmBlockInfo
-     * @throws TikaException 
-     */
-    protected ChmBlockInfo getChmBlockInfo(DirectoryListingEntry dle,
-            int bytesPerBlock, ChmLzxcControlData clcd,
-            ChmBlockInfo chmBlockInfo) throws TikaException {
-        if (!validateParameters(dle, bytesPerBlock, clcd, chmBlockInfo))
-            throw new ChmParsingException("Please check you parameters");
-
-        chmBlockInfo.setStartBlock(dle.getOffset() / bytesPerBlock);
-        chmBlockInfo.setEndBlock((dle.getOffset() + dle.getLength())
-                / bytesPerBlock);
-        chmBlockInfo.setStartOffset(dle.getOffset() % bytesPerBlock);
-        chmBlockInfo.setEndOffset((dle.getOffset() + dle.getLength())
-                % bytesPerBlock);
-        // potential problem with casting long to int
-        chmBlockInfo
-                .setIniBlock(chmBlockInfo.startBlock - 
-                        chmBlockInfo.startBlock % (int) clcd.getResetInterval());
-//                .setIniBlock((chmBlockInfo.startBlock - chmBlockInfo.startBlock)
-//                        % (int) clcd.getResetInterval());
-        return chmBlockInfo;
-    }
-
-    public static ChmBlockInfo getChmBlockInfoInstance(
-            DirectoryListingEntry dle, int bytesPerBlock,
-            ChmLzxcControlData clcd) {
-        setChmBlockInfo(new ChmBlockInfo());
-        getChmBlockInfo().setStartBlock(dle.getOffset() / bytesPerBlock);
-        getChmBlockInfo().setEndBlock(
-                (dle.getOffset() + dle.getLength()) / bytesPerBlock);
-        getChmBlockInfo().setStartOffset(dle.getOffset() % bytesPerBlock);
-        getChmBlockInfo().setEndOffset(
-                (dle.getOffset() + dle.getLength()) % bytesPerBlock);
-        // potential problem with casting long to int
-        getChmBlockInfo().setIniBlock(
-                getChmBlockInfo().startBlock - getChmBlockInfo().startBlock
-                        % (int) clcd.getResetInterval());
-//                (getChmBlockInfo().startBlock - getChmBlockInfo().startBlock)
-//                        % (int) clcd.getResetInterval());
-        return getChmBlockInfo();
-    }
-
-    /**
-     * Returns textual representation of ChmBlockInfo
-     */
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append("iniBlock:=" + getIniBlock() + ", ");
-        sb.append("startBlock:=" + getStartBlock() + ", ");
-        sb.append("endBlock:=" + getEndBlock() + ", ");
-        sb.append("startOffset:=" + getStartOffset() + ", ");
-        sb.append("endOffset:=" + getEndOffset()
-                + System.getProperty("line.separator"));
-        return sb.toString();
-    }
-
-    private boolean validateParameters(DirectoryListingEntry dle,
-            int bytesPerBlock, ChmLzxcControlData clcd,
-            ChmBlockInfo chmBlockInfo) {
-        int goodParameter = 0;
-        if (dle != null)
-            ++goodParameter;
-        if (bytesPerBlock > 0)
-            ++goodParameter;
-        if (clcd != null)
-            ++goodParameter;
-        if (chmBlockInfo != null)
-            ++goodParameter;
-        return (goodParameter == 4);
-    }
-
-    public static void main(String[] args) {
-    }
-
-    /**
-     * Returns an initial block index
-     * 
-     * @return int
-     */
-    public int getIniBlock() {
-        return iniBlock;
-    }
-
-    /**
-     * Sets the initial block index
-     * 
-     * @param iniBlock
-     *            - int
-     */
-    private void setIniBlock(int iniBlock) {
-        this.iniBlock = iniBlock;
-    }
-
-    /**
-     * Returns the start block index
-     * 
-     * @return int
-     */
-    public int getStartBlock() {
-        return startBlock;
-    }
-
-    /**
-     * Sets the start block index
-     * 
-     * @param startBlock
-     *            - int
-     */
-    private void setStartBlock(int startBlock) {
-        this.startBlock = startBlock;
-    }
-
-    /**
-     * Returns the end block index
-     * 
-     * @return - int
-     */
-    public int getEndBlock() {
-        return endBlock;
-    }
-
-    /**
-     * Sets the end block index
-     * 
-     * @param endBlock
-     *            - int
-     */
-    private void setEndBlock(int endBlock) {
-        this.endBlock = endBlock;
-    }
-
-    /**
-     * Returns the start offset index
-     * 
-     * @return - int
-     */
-    public int getStartOffset() {
-        return startOffset;
-    }
-
-    /**
-     * Sets the start offset index
-     * 
-     * @param startOffset
-     *            - int
-     */
-    private void setStartOffset(int startOffset) {
-        this.startOffset = startOffset;
-    }
-
-    /**
-     * Returns the end offset index
-     * 
-     * @return - int
-     */
-    public int getEndOffset() {
-        return endOffset;
-    }
-
-    /**
-     * Sets the end offset index
-     * 
-     * @param endOffset
-     *            - int
-     */
-    private void setEndOffset(int endOffset) {
-        this.endOffset = endOffset;
-    }
-
-    public static void setChmBlockInfo(ChmBlockInfo chmBlockInfo) {
-        ChmBlockInfo.chmBlockInfo = chmBlockInfo;
-    }
-
-    public static ChmBlockInfo getChmBlockInfo() {
-        return chmBlockInfo;
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.lzx;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.accessor.ChmLzxcControlData;
+import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+/**
+ * A container that contains chm block information such as: i. initial block is
+ * using to reset main tree ii. start block is using for knowing where to start
+ * iii. end block is using for knowing where to stop iv. start offset is using
+ * for knowing where to start reading v. end offset is using for knowing where
+ * to stop reading
+ * 
+ */
+public class ChmBlockInfo {
+    /* class members */
+    private int iniBlock;
+    private int startBlock;
+    private int endBlock;
+    private int startOffset;
+    private int endOffset;
+
+    private static ChmBlockInfo chmBlockInfo = null;
+
+    private ChmBlockInfo() {
+
+    }
+
+    /**
+     * Returns an information related to the chmBlockInfo
+     * 
+     * @param dle
+     *            - DirectoryListingEntry
+     * @param bytesPerBlock
+     *            - int, = chmLzxcResetTable.block_length
+     * @param clcd
+     *            - ChmLzxcControlData
+     * @param chmBlockInfo
+     *            - ChmBlockInfo
+     * 
+     * @return ChmBlockInfo
+     * @throws TikaException 
+     */
+    protected ChmBlockInfo getChmBlockInfo(DirectoryListingEntry dle,
+            int bytesPerBlock, ChmLzxcControlData clcd,
+            ChmBlockInfo chmBlockInfo) throws TikaException {
+        if (!validateParameters(dle, bytesPerBlock, clcd, chmBlockInfo))
+            throw new ChmParsingException("Please check you parameters");
+
+        chmBlockInfo.setStartBlock(dle.getOffset() / bytesPerBlock);
+        chmBlockInfo.setEndBlock((dle.getOffset() + dle.getLength())
+                / bytesPerBlock);
+        chmBlockInfo.setStartOffset(dle.getOffset() % bytesPerBlock);
+        chmBlockInfo.setEndOffset((dle.getOffset() + dle.getLength())
+                % bytesPerBlock);
+        // potential problem with casting long to int
+        chmBlockInfo
+                .setIniBlock(chmBlockInfo.startBlock - 
+                        chmBlockInfo.startBlock % (int) clcd.getResetInterval());
+//                .setIniBlock((chmBlockInfo.startBlock - chmBlockInfo.startBlock)
+//                        % (int) clcd.getResetInterval());
+        return chmBlockInfo;
+    }
+
+    public static ChmBlockInfo getChmBlockInfoInstance(
+            DirectoryListingEntry dle, int bytesPerBlock,
+            ChmLzxcControlData clcd) {
+        setChmBlockInfo(new ChmBlockInfo());
+        getChmBlockInfo().setStartBlock(dle.getOffset() / bytesPerBlock);
+        getChmBlockInfo().setEndBlock(
+                (dle.getOffset() + dle.getLength()) / bytesPerBlock);
+        getChmBlockInfo().setStartOffset(dle.getOffset() % bytesPerBlock);
+        getChmBlockInfo().setEndOffset(
+                (dle.getOffset() + dle.getLength()) % bytesPerBlock);
+        // potential problem with casting long to int
+        getChmBlockInfo().setIniBlock(
+                getChmBlockInfo().startBlock - getChmBlockInfo().startBlock
+                        % (int) clcd.getResetInterval());
+//                (getChmBlockInfo().startBlock - getChmBlockInfo().startBlock)
+//                        % (int) clcd.getResetInterval());
+        return getChmBlockInfo();
+    }
+
+    /**
+     * Returns textual representation of ChmBlockInfo
+     */
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append("iniBlock:=" + getIniBlock() + ", ");
+        sb.append("startBlock:=" + getStartBlock() + ", ");
+        sb.append("endBlock:=" + getEndBlock() + ", ");
+        sb.append("startOffset:=" + getStartOffset() + ", ");
+        sb.append("endOffset:=" + getEndOffset()
+                + System.getProperty("line.separator"));
+        return sb.toString();
+    }
+
+    private boolean validateParameters(DirectoryListingEntry dle,
+            int bytesPerBlock, ChmLzxcControlData clcd,
+            ChmBlockInfo chmBlockInfo) {
+        int goodParameter = 0;
+        if (dle != null)
+            ++goodParameter;
+        if (bytesPerBlock > 0)
+            ++goodParameter;
+        if (clcd != null)
+            ++goodParameter;
+        if (chmBlockInfo != null)
+            ++goodParameter;
+        return (goodParameter == 4);
+    }
+
+    public static void main(String[] args) {
+    }
+
+    /**
+     * Returns an initial block index
+     * 
+     * @return int
+     */
+    public int getIniBlock() {
+        return iniBlock;
+    }
+
+    /**
+     * Sets the initial block index
+     * 
+     * @param iniBlock
+     *            - int
+     */
+    private void setIniBlock(int iniBlock) {
+        this.iniBlock = iniBlock;
+    }
+
+    /**
+     * Returns the start block index
+     * 
+     * @return int
+     */
+    public int getStartBlock() {
+        return startBlock;
+    }
+
+    /**
+     * Sets the start block index
+     * 
+     * @param startBlock
+     *            - int
+     */
+    private void setStartBlock(int startBlock) {
+        this.startBlock = startBlock;
+    }
+
+    /**
+     * Returns the end block index
+     * 
+     * @return - int
+     */
+    public int getEndBlock() {
+        return endBlock;
+    }
+
+    /**
+     * Sets the end block index
+     * 
+     * @param endBlock
+     *            - int
+     */
+    private void setEndBlock(int endBlock) {
+        this.endBlock = endBlock;
+    }
+
+    /**
+     * Returns the start offset index
+     * 
+     * @return - int
+     */
+    public int getStartOffset() {
+        return startOffset;
+    }
+
+    /**
+     * Sets the start offset index
+     * 
+     * @param startOffset
+     *            - int
+     */
+    private void setStartOffset(int startOffset) {
+        this.startOffset = startOffset;
+    }
+
+    /**
+     * Returns the end offset index
+     * 
+     * @return - int
+     */
+    public int getEndOffset() {
+        return endOffset;
+    }
+
+    /**
+     * Sets the end offset index
+     * 
+     * @param endOffset
+     *            - int
+     */
+    private void setEndOffset(int endOffset) {
+        this.endOffset = endOffset;
+    }
+
+    public static void setChmBlockInfo(ChmBlockInfo chmBlockInfo) {
+        ChmBlockInfo.chmBlockInfo = chmBlockInfo;
+    }
+
+    public static ChmBlockInfo getChmBlockInfo() {
+        return chmBlockInfo;
+    }
+}