You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:34 UTC

[28/39] tika git commit: Convert new lines from windows to unix

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
index 1630edd..9d0a2f0 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
@@ -1,398 +1,398 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.accessor;
-
-import java.math.BigInteger;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.chm.core.ChmCommons;
-import org.apache.tika.parser.chm.core.ChmConstants;
-import org.apache.tika.parser.chm.exception.ChmParsingException;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-/**
- * Holds chm listing entries
- */
-public class ChmDirectoryListingSet {
-    private List<DirectoryListingEntry> dlel;
-    private byte[] data;
-    private int placeHolder = -1;
-    private long dataOffset = -1;
-    private int controlDataIndex = -1;
-    private int resetTableIndex = -1;
-
-    private boolean isNotControlDataFound = true;
-    private boolean isNotResetTableFound = true;
-
-    /**
-     * Constructs chm directory listing set
-     * 
-     * @param data
-     *            byte[]
-     * @param chmItsHeader
-     * @param chmItspHeader
-     * @throws TikaException 
-     */
-    public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
-            ChmItspHeader chmItspHeader) throws TikaException {
-        setDirectoryListingEntryList(new ArrayList<DirectoryListingEntry>());
-        ChmCommons.assertByteArrayNotNull(data);
-        setData(data);
-        enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
-    }
-
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append("list:=" + getDirectoryListingEntryList().toString()
-                + System.getProperty("line.separator"));
-        sb.append("number of list items:="
-                + getDirectoryListingEntryList().size());
-        return sb.toString();
-    }
-
-    /**
-     * Returns control data index that located in List
-     * 
-     * @return control data index
-     */
-    public int getControlDataIndex() {
-        return controlDataIndex;
-    }
-
-    /**
-     * Sets control data index
-     * 
-     * @param controlDataIndex
-     */
-    protected void setControlDataIndex(int controlDataIndex) {
-        this.controlDataIndex = controlDataIndex;
-    }
-
-    /**
-     * Return index of reset table
-     * 
-     * @return reset table index
-     */
-    public int getResetTableIndex() {
-        return resetTableIndex;
-    }
-
-    /**
-     * Sets reset table index
-     * 
-     * @param resetTableIndex
-     */
-    protected void setResetTableIndex(int resetTableIndex) {
-        this.resetTableIndex = resetTableIndex;
-    }
-
-    /**
-     * Sets place holder
-     * 
-     * @param placeHolder
-     */
-    private void setPlaceHolder(int placeHolder) {
-        this.placeHolder = placeHolder;
-    }
-
-    private ChmPmglHeader PMGLheader;
-    /**
-     * Enumerates chm directory listing entries
-     * 
-     * @param chmItsHeader
-     *            chm itsf PMGLheader
-     * @param chmItspHeader
-     *            chm itsp PMGLheader
-     */
-    private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
-            ChmItspHeader chmItspHeader) {
-        try {
-            int startPmgl = chmItspHeader.getIndex_head();
-            int stopPmgl = chmItspHeader.getUnknown_0024();
-            int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader
-                    .getHeader_len());
-            setDataOffset(chmItsHeader.getDataOffset());
-
-            /* loops over all pmgls */
-            byte[] dir_chunk = null;
-            for (int i = startPmgl; i>=0; ) {
-                dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
-                int start = i * (int) chmItspHeader.getBlock_len() + dir_offset;
-                dir_chunk = ChmCommons
-                        .copyOfRange(getData(), start,
-                                start +(int) chmItspHeader.getBlock_len());
-
-                PMGLheader = new ChmPmglHeader();
-                PMGLheader.parse(dir_chunk, PMGLheader);
-                enumerateOneSegment(dir_chunk);
-                
-                i=PMGLheader.getBlockNext();
-                dir_chunk = null;
-            }
-        } catch (Exception e) {
-            e.printStackTrace();
-        } finally {
-            setData(null);
-        }
-    }
-
-    /**
-     * Checks control data
-     * 
-     * @param dle
-     *            chm directory listing entry
-     */
-    private void checkControlData(DirectoryListingEntry dle) {
-        if (isNotControlDataFound) {
-            if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
-                setControlDataIndex(getDirectoryListingEntryList().size());
-                isNotControlDataFound = false;
-            }
-        }
-    }
-
-    /**
-     * Checks reset table
-     * 
-     * @param dle
-     *            chm directory listing entry
-     */
-    private void checkResetTable(DirectoryListingEntry dle) {
-        if (isNotResetTableFound) {
-            if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
-                setResetTableIndex(getDirectoryListingEntryList().size());
-                isNotResetTableFound = false;
-            }
-        }
-    }
-
-    public static final boolean startsWith(byte[] data, String prefix) {
-        for (int i=0; i<prefix.length(); i++) {
-            if (data[i]!=prefix.charAt(i)) {
-                return false;
-            }
-        }
-        
-        return true;
-    }
-    /**
-     * Enumerates chm directory listing entries in single chm segment
-     * 
-     * @param dir_chunk
-     */
-    private void enumerateOneSegment(byte[] dir_chunk) throws ChmParsingException {
-//        try {
-            if (dir_chunk != null) {
-                int header_len;
-                if (startsWith(dir_chunk, ChmConstants.CHM_PMGI_MARKER)) {
-                    header_len = ChmConstants.CHM_PMGI_LEN;
-                    return; //skip PMGI
-                }
-                else if (startsWith(dir_chunk, ChmConstants.PMGL)) {
-                    header_len = ChmConstants.CHM_PMGL_LEN;
-                }
-                else {
-                    throw new ChmParsingException("Bad dir entry block.");
-                }
-
-                placeHolder = header_len;
-                //setPlaceHolder(header_len);
-                while (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
-                        /*&& dir_chunk[placeHolder - 1] != 115*/) 
-                {
-                    //get entry name length
-                    int strlen = 0;// = getEncint(data);
-                    byte temp;
-                    while ((temp=dir_chunk[placeHolder++]) >= 0x80)
-                    {
-                        strlen <<= 7;
-                        strlen += temp & 0x7f;
-                    }
-
-                    strlen = (strlen << 7) + temp & 0x7f;
-                    
-                    if (strlen>dir_chunk.length) {
-                        throw new ChmParsingException("Bad data of a string length.");
-                    }
-                    
-                    DirectoryListingEntry dle = new DirectoryListingEntry();
-                    dle.setNameLength(strlen);
-                    dle.setName(new String(ChmCommons.copyOfRange(
-                                dir_chunk, placeHolder,
-                                (placeHolder + dle.getNameLength())), UTF_8));
-
-                    checkControlData(dle);
-                    checkResetTable(dle);
-                    setPlaceHolder(placeHolder
-                            + dle.getNameLength());
-
-                    /* Sets entry type */
-                    if (placeHolder < dir_chunk.length
-                            && dir_chunk[placeHolder] == 0)
-                        dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
-                    else
-                        dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
-
-                    setPlaceHolder(placeHolder + 1);
-                    dle.setOffset(getEncint(dir_chunk));
-                    dle.setLength(getEncint(dir_chunk));
-                    getDirectoryListingEntryList().add(dle);
-                }
-                
-//                int indexWorkData = ChmCommons.indexOf(dir_chunk,
-//                        "::".getBytes(UTF_8));
-//                int indexUserData = ChmCommons.indexOf(dir_chunk,
-//                        "/".getBytes(UTF_8));
-//
-//                if (indexUserData>=0 && indexUserData < indexWorkData)
-//                    setPlaceHolder(indexUserData);
-//                else if (indexWorkData>=0) {
-//                    setPlaceHolder(indexWorkData);
-//                }
-//                else {
-//                    setPlaceHolder(indexUserData);
-//                }
-//
-//                if (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
-//                        && dir_chunk[placeHolder - 1] != 115) {// #{
-//                    do {
-//                        if (dir_chunk[placeHolder - 1] > 0) {
-//                            DirectoryListingEntry dle = new DirectoryListingEntry();
-//
-//                            // two cases: 1. when dir_chunk[placeHolder -
-//                            // 1] == 0x73
-//                            // 2. when dir_chunk[placeHolder + 1] == 0x2f
-//                            doNameCheck(dir_chunk, dle);
-//
-//                            // dle.setName(new
-//                            // String(Arrays.copyOfRange(dir_chunk,
-//                            // placeHolder, (placeHolder +
-//                            // dle.getNameLength()))));
-//                            dle.setName(new String(ChmCommons.copyOfRange(
-//                                    dir_chunk, placeHolder,
-//                                    (placeHolder + dle.getNameLength())), UTF_8));
-//                            checkControlData(dle);
-//                            checkResetTable(dle);
-//                            setPlaceHolder(placeHolder
-//                                    + dle.getNameLength());
-//
-//                            /* Sets entry type */
-//                            if (placeHolder < dir_chunk.length
-//                                    && dir_chunk[placeHolder] == 0)
-//                                dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
-//                            else
-//                                dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
-//
-//                            setPlaceHolder(placeHolder + 1);
-//                            dle.setOffset(getEncint(dir_chunk));
-//                            dle.setLength(getEncint(dir_chunk));
-//                            getDirectoryListingEntryList().add(dle);
-//                        } else
-//                            setPlaceHolder(placeHolder + 1);
-//
-//                    } while (nextEntry(dir_chunk));
-//                }
-            }
-
-//        } catch (Exception e) {
-//            e.printStackTrace();
-//        }
-    }
-
-
-    /**
-     * Returns encrypted integer
-     * 
-     * @param data_chunk
-     * 
-     * @return
-     */
-    private int getEncint(byte[] data_chunk) {
-        byte ob;
-        BigInteger bi = BigInteger.ZERO;
-        byte[] nb = new byte[1];
-
-        if (placeHolder < data_chunk.length) {
-            while ((ob = data_chunk[placeHolder]) < 0) {
-                nb[0] = (byte) ((ob & 0x7f));
-                bi = bi.shiftLeft(7).add(new BigInteger(nb));
-                setPlaceHolder(placeHolder + 1);
-            }
-            nb[0] = (byte) ((ob & 0x7f));
-            bi = bi.shiftLeft(7).add(new BigInteger(nb));
-            setPlaceHolder(placeHolder + 1);
-        }
-        return bi.intValue();
-    }
-
-    /**
-     * Sets chm directory listing entry list
-     * 
-     * @param dlel
-     *            chm directory listing entry list
-     */
-    public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
-        this.dlel = dlel;
-    }
-
-    /**
-     * Returns chm directory listing entry list
-     * 
-     * @return List<DirectoryListingEntry>
-     */
-    public List<DirectoryListingEntry> getDirectoryListingEntryList() {
-        return dlel;
-    }
-
-    /**
-     * Sets data
-     * 
-     * @param data
-     */
-    private void setData(byte[] data) {
-        this.data = data;
-    }
-
-    /**
-     * Returns data
-     * 
-     * @return
-     */
-    private byte[] getData() {
-        return data;
-    }
-
-    /**
-     * Sets data offset
-     * 
-     * @param dataOffset
-     */
-    private void setDataOffset(long dataOffset) {
-        this.dataOffset = dataOffset;
-    }
-
-    /**
-     * Returns data offset
-     * 
-     * @return dataOffset
-     */
-    public long getDataOffset() {
-        return dataOffset;
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.accessor;
+
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * Holds chm listing entries
+ */
+public class ChmDirectoryListingSet {
+    private List<DirectoryListingEntry> dlel;
+    private byte[] data;
+    private int placeHolder = -1;
+    private long dataOffset = -1;
+    private int controlDataIndex = -1;
+    private int resetTableIndex = -1;
+
+    private boolean isNotControlDataFound = true;
+    private boolean isNotResetTableFound = true;
+
+    /**
+     * Constructs chm directory listing set
+     * 
+     * @param data
+     *            byte[]
+     * @param chmItsHeader
+     * @param chmItspHeader
+     * @throws TikaException 
+     */
+    public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
+            ChmItspHeader chmItspHeader) throws TikaException {
+        setDirectoryListingEntryList(new ArrayList<DirectoryListingEntry>());
+        ChmCommons.assertByteArrayNotNull(data);
+        setData(data);
+        enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
+    }
+
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append("list:=" + getDirectoryListingEntryList().toString()
+                + System.getProperty("line.separator"));
+        sb.append("number of list items:="
+                + getDirectoryListingEntryList().size());
+        return sb.toString();
+    }
+
+    /**
+     * Returns control data index that located in List
+     * 
+     * @return control data index
+     */
+    public int getControlDataIndex() {
+        return controlDataIndex;
+    }
+
+    /**
+     * Sets control data index
+     * 
+     * @param controlDataIndex
+     */
+    protected void setControlDataIndex(int controlDataIndex) {
+        this.controlDataIndex = controlDataIndex;
+    }
+
+    /**
+     * Return index of reset table
+     * 
+     * @return reset table index
+     */
+    public int getResetTableIndex() {
+        return resetTableIndex;
+    }
+
+    /**
+     * Sets reset table index
+     * 
+     * @param resetTableIndex
+     */
+    protected void setResetTableIndex(int resetTableIndex) {
+        this.resetTableIndex = resetTableIndex;
+    }
+
+    /**
+     * Sets place holder
+     * 
+     * @param placeHolder
+     */
+    private void setPlaceHolder(int placeHolder) {
+        this.placeHolder = placeHolder;
+    }
+
+    private ChmPmglHeader PMGLheader;
+    /**
+     * Enumerates chm directory listing entries
+     * 
+     * @param chmItsHeader
+     *            chm itsf PMGLheader
+     * @param chmItspHeader
+     *            chm itsp PMGLheader
+     */
+    private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
+            ChmItspHeader chmItspHeader) {
+        try {
+            int startPmgl = chmItspHeader.getIndex_head();
+            int stopPmgl = chmItspHeader.getUnknown_0024();
+            int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader
+                    .getHeader_len());
+            setDataOffset(chmItsHeader.getDataOffset());
+
+            /* loops over all pmgls */
+            byte[] dir_chunk = null;
+            for (int i = startPmgl; i>=0; ) {
+                dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
+                int start = i * (int) chmItspHeader.getBlock_len() + dir_offset;
+                dir_chunk = ChmCommons
+                        .copyOfRange(getData(), start,
+                                start +(int) chmItspHeader.getBlock_len());
+
+                PMGLheader = new ChmPmglHeader();
+                PMGLheader.parse(dir_chunk, PMGLheader);
+                enumerateOneSegment(dir_chunk);
+                
+                i=PMGLheader.getBlockNext();
+                dir_chunk = null;
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        } finally {
+            setData(null);
+        }
+    }
+
+    /**
+     * Checks control data
+     * 
+     * @param dle
+     *            chm directory listing entry
+     */
+    private void checkControlData(DirectoryListingEntry dle) {
+        if (isNotControlDataFound) {
+            if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
+                setControlDataIndex(getDirectoryListingEntryList().size());
+                isNotControlDataFound = false;
+            }
+        }
+    }
+
+    /**
+     * Checks reset table
+     * 
+     * @param dle
+     *            chm directory listing entry
+     */
+    private void checkResetTable(DirectoryListingEntry dle) {
+        if (isNotResetTableFound) {
+            if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
+                setResetTableIndex(getDirectoryListingEntryList().size());
+                isNotResetTableFound = false;
+            }
+        }
+    }
+
+    public static final boolean startsWith(byte[] data, String prefix) {
+        for (int i=0; i<prefix.length(); i++) {
+            if (data[i]!=prefix.charAt(i)) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+    /**
+     * Enumerates chm directory listing entries in single chm segment
+     * 
+     * @param dir_chunk
+     */
+    private void enumerateOneSegment(byte[] dir_chunk) throws ChmParsingException {
+//        try {
+            if (dir_chunk != null) {
+                int header_len;
+                if (startsWith(dir_chunk, ChmConstants.CHM_PMGI_MARKER)) {
+                    header_len = ChmConstants.CHM_PMGI_LEN;
+                    return; //skip PMGI
+                }
+                else if (startsWith(dir_chunk, ChmConstants.PMGL)) {
+                    header_len = ChmConstants.CHM_PMGL_LEN;
+                }
+                else {
+                    throw new ChmParsingException("Bad dir entry block.");
+                }
+
+                placeHolder = header_len;
+                //setPlaceHolder(header_len);
+                while (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
+                        /*&& dir_chunk[placeHolder - 1] != 115*/) 
+                {
+                    //get entry name length
+                    int strlen = 0;// = getEncint(data);
+                    byte temp;
+                    while ((temp=dir_chunk[placeHolder++]) >= 0x80)
+                    {
+                        strlen <<= 7;
+                        strlen += temp & 0x7f;
+                    }
+
+                    strlen = (strlen << 7) + temp & 0x7f;
+                    
+                    if (strlen>dir_chunk.length) {
+                        throw new ChmParsingException("Bad data of a string length.");
+                    }
+                    
+                    DirectoryListingEntry dle = new DirectoryListingEntry();
+                    dle.setNameLength(strlen);
+                    dle.setName(new String(ChmCommons.copyOfRange(
+                                dir_chunk, placeHolder,
+                                (placeHolder + dle.getNameLength())), UTF_8));
+
+                    checkControlData(dle);
+                    checkResetTable(dle);
+                    setPlaceHolder(placeHolder
+                            + dle.getNameLength());
+
+                    /* Sets entry type */
+                    if (placeHolder < dir_chunk.length
+                            && dir_chunk[placeHolder] == 0)
+                        dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
+                    else
+                        dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
+
+                    setPlaceHolder(placeHolder + 1);
+                    dle.setOffset(getEncint(dir_chunk));
+                    dle.setLength(getEncint(dir_chunk));
+                    getDirectoryListingEntryList().add(dle);
+                }
+                
+//                int indexWorkData = ChmCommons.indexOf(dir_chunk,
+//                        "::".getBytes(UTF_8));
+//                int indexUserData = ChmCommons.indexOf(dir_chunk,
+//                        "/".getBytes(UTF_8));
+//
+//                if (indexUserData>=0 && indexUserData < indexWorkData)
+//                    setPlaceHolder(indexUserData);
+//                else if (indexWorkData>=0) {
+//                    setPlaceHolder(indexWorkData);
+//                }
+//                else {
+//                    setPlaceHolder(indexUserData);
+//                }
+//
+//                if (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
+//                        && dir_chunk[placeHolder - 1] != 115) {// #{
+//                    do {
+//                        if (dir_chunk[placeHolder - 1] > 0) {
+//                            DirectoryListingEntry dle = new DirectoryListingEntry();
+//
+//                            // two cases: 1. when dir_chunk[placeHolder -
+//                            // 1] == 0x73
+//                            // 2. when dir_chunk[placeHolder + 1] == 0x2f
+//                            doNameCheck(dir_chunk, dle);
+//
+//                            // dle.setName(new
+//                            // String(Arrays.copyOfRange(dir_chunk,
+//                            // placeHolder, (placeHolder +
+//                            // dle.getNameLength()))));
+//                            dle.setName(new String(ChmCommons.copyOfRange(
+//                                    dir_chunk, placeHolder,
+//                                    (placeHolder + dle.getNameLength())), UTF_8));
+//                            checkControlData(dle);
+//                            checkResetTable(dle);
+//                            setPlaceHolder(placeHolder
+//                                    + dle.getNameLength());
+//
+//                            /* Sets entry type */
+//                            if (placeHolder < dir_chunk.length
+//                                    && dir_chunk[placeHolder] == 0)
+//                                dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
+//                            else
+//                                dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
+//
+//                            setPlaceHolder(placeHolder + 1);
+//                            dle.setOffset(getEncint(dir_chunk));
+//                            dle.setLength(getEncint(dir_chunk));
+//                            getDirectoryListingEntryList().add(dle);
+//                        } else
+//                            setPlaceHolder(placeHolder + 1);
+//
+//                    } while (nextEntry(dir_chunk));
+//                }
+            }
+
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//        }
+    }
+
+
+    /**
+     * Returns encrypted integer
+     * 
+     * @param data_chunk
+     * 
+     * @return
+     */
+    private int getEncint(byte[] data_chunk) {
+        byte ob;
+        BigInteger bi = BigInteger.ZERO;
+        byte[] nb = new byte[1];
+
+        if (placeHolder < data_chunk.length) {
+            while ((ob = data_chunk[placeHolder]) < 0) {
+                nb[0] = (byte) ((ob & 0x7f));
+                bi = bi.shiftLeft(7).add(new BigInteger(nb));
+                setPlaceHolder(placeHolder + 1);
+            }
+            nb[0] = (byte) ((ob & 0x7f));
+            bi = bi.shiftLeft(7).add(new BigInteger(nb));
+            setPlaceHolder(placeHolder + 1);
+        }
+        return bi.intValue();
+    }
+
+    /**
+     * Sets chm directory listing entry list
+     * 
+     * @param dlel
+     *            chm directory listing entry list
+     */
+    public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
+        this.dlel = dlel;
+    }
+
+    /**
+     * Returns chm directory listing entry list
+     * 
+     * @return List<DirectoryListingEntry>
+     */
+    public List<DirectoryListingEntry> getDirectoryListingEntryList() {
+        return dlel;
+    }
+
+    /**
+     * Sets data
+     * 
+     * @param data
+     */
+    private void setData(byte[] data) {
+        this.data = data;
+    }
+
+    /**
+     * Returns data
+     * 
+     * @return
+     */
+    private byte[] getData() {
+        return data;
+    }
+
+    /**
+     * Sets data offset
+     * 
+     * @param dataOffset
+     */
+    private void setDataOffset(long dataOffset) {
+        this.dataOffset = dataOffset;
+    }
+
+    /**
+     * Returns data offset
+     * 
+     * @return dataOffset
+     */
+    public long getDataOffset() {
+        return dataOffset;
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
index a231e14..2c4dc4e 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
@@ -1,492 +1,492 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.accessor;
-
-import java.math.BigInteger;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.chm.assertion.ChmAssert;
-import org.apache.tika.parser.chm.core.ChmConstants;
-import org.apache.tika.parser.chm.exception.ChmParsingException;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-/**
- * The Header 0000: char[4] 'ITSF' 0004: DWORD 3 (Version number) 0008: DWORD
- * Total header length, including header section table and following data. 000C:
- * DWORD 1 (unknown) 0010: DWORD a timestamp 0014: DWORD Windows Language ID
- * 0018: GUID {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} 0028: GUID
- * {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} Note: a GUID is $10 bytes, arranged
- * as 1 DWORD, 2 WORDs, and 8 BYTEs. 0000: QWORD Offset of section from
- * beginning of file 0008: QWORD Length of section Following the header section
- * table is 8 bytes of additional header data. In Version 2 files, this data is
- * not there and the content section starts immediately after the directory.
- * 
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?show-translation-form=1}
- * 
- */
-/* structure of ITSF headers */
-public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
-    private static final long serialVersionUID = 2215291838533213826L;
-    private byte[] signature;
-    private int version; /* 4 */
-    private int header_len; /* 8 */
-    private int unknown_000c; /* c */
-    private long last_modified; /* 10 */
-    private long lang_id; /* 14 */
-    private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
-    private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
-    private long unknown_offset; /* 38 */
-    private long unknown_len; /* 40 */
-    private long dir_offset; /* 48 */
-    private long dir_len; /* 50 */
-    private long data_offset; /* 58 (Not present before V3) */
-
-    /* local usage */
-    private int dataRemained;
-    private int currentPlace = 0;
-
-    public ChmItsfHeader() {
-        signature = ChmConstants.ITSF.getBytes(UTF_8); /* 0 (ITSF) */
-    }
-
-    /**
-     * Prints the values of ChmfHeader
-     */
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append(new String(getSignature(), UTF_8) + " ");
-        sb.append(getVersion() + " ");
-        sb.append(getHeaderLen() + " ");
-        sb.append(getUnknown_000c() + " ");
-        sb.append(getLastModified() + " ");
-        sb.append(getLangId() + " ");
-        sb.append(getDir_uuid() + " ");
-        sb.append(getStream_uuid() + " ");
-        sb.append(getUnknownOffset() + " ");
-        sb.append(getUnknownLen() + " ");
-        sb.append(getDirOffset() + " ");
-        sb.append(getDirLen() + " ");
-        sb.append(getDataOffset() + " ");
-        return sb.toString();
-    }
-
-    /**
-     * Returns a signature of itsf header
-     * 
-     * @return itsf header
-     */
-    public byte[] getSignature() {
-        return signature;
-    }
-
-    /**
-     * Sets itsf header signature
-     * 
-     * @param signature
-     */
-    protected void setSignature(byte[] signature) {
-        this.signature = signature;
-    }
-
-    /**
-     * Returns itsf header version
-     * 
-     * @return itsf version
-     */
-    public int getVersion() {
-        return version;
-    }
-
-    /**
-     * Sets itsf version
-     * 
-     * @param version
-     */
-    protected void setVersion(int version) {
-        this.version = version;
-    }
-
-    /**
-     * Returns itsf header length
-     * 
-     * @return length
-     */
-    public int getHeaderLen() {
-        return header_len;
-    }
-
-    /**
-     * Sets itsf header length
-     * 
-     * @param header_len
-     */
-    protected void setHeaderLen(int header_len) {
-        this.header_len = header_len;
-    }
-
-    /**
-     * Returns unknown_00c value
-     * 
-     * @return unknown_00c
-     */
-    public int getUnknown_000c() {
-        return unknown_000c;
-    }
-
-    /**
-     * Sets unknown_00c
-     * 
-     * @param unknown_000c
-     */
-    protected void setUnknown_000c(int unknown_000c) {
-        this.unknown_000c = unknown_000c;
-    }
-
-    /**
-     * Returns last modified date of the chm file
-     * 
-     * @return last modified date as long
-     */
-    public long getLastModified() {
-        return last_modified;
-    }
-
-    /**
-     * Sets last modified date of the chm file
-     * 
-     * @param last_modified
-     */
-    protected void setLastModified(long last_modified) {
-        this.last_modified = last_modified;
-    }
-
-    /**
-     * Returns language ID
-     * 
-     * @return language_id
-     */
-    public long getLangId() {
-        return lang_id;
-    }
-
-    /**
-     * Sets language_id
-     * 
-     * @param lang_id
-     */
-    protected void setLangId(long lang_id) {
-        this.lang_id = lang_id;
-    }
-
-    /**
-     * Returns directory uuid
-     * 
-     * @return dir_uuid
-     */
-    public byte[] getDir_uuid() {
-        return dir_uuid;
-    }
-
-    /**
-     * Sets directory uuid
-     * 
-     * @param dir_uuid
-     */
-    protected void setDir_uuid(byte[] dir_uuid) {
-        this.dir_uuid = dir_uuid;
-    }
-
-    /**
-     * Returns stream uuid
-     * 
-     * @return stream_uuid
-     */
-    public byte[] getStream_uuid() {
-        return stream_uuid;
-    }
-
-    /**
-     * Sets stream uuid
-     * 
-     * @param stream_uuid
-     */
-    protected void setStream_uuid(byte[] stream_uuid) {
-        this.stream_uuid = stream_uuid;
-    }
-
-    /**
-     * Returns unknown offset
-     * 
-     * @return unknown_offset
-     */
-    public long getUnknownOffset() {
-        return unknown_offset;
-    }
-
-    /**
-     * Sets unknown offset
-     * 
-     * @param unknown_offset
-     */
-    protected void setUnknownOffset(long unknown_offset) {
-        this.unknown_offset = unknown_offset;
-    }
-
-    /**
-     * Returns unknown length
-     * 
-     * @return unknown_length
-     */
-    public long getUnknownLen() {
-        return unknown_len;
-    }
-
-    /**
-     * Sets unknown length
-     * 
-     * @param unknown_len
-     */
-    protected void setUnknownLen(long unknown_len) {
-        this.unknown_len = unknown_len;
-    }
-
-    /**
-     * Returns directory offset
-     * 
-     * @return directory_offset
-     */
-    public long getDirOffset() {
-        return dir_offset;
-    }
-
-    /**
-     * Sets directory offset
-     * 
-     * @param dir_offset
-     */
-    protected void setDirOffset(long dir_offset) {
-        this.dir_offset = dir_offset;
-    }
-
-    /**
-     * Returns directory length
-     * 
-     * @return directory_offset
-     */
-    public long getDirLen() {
-        return dir_len;
-    }
-
-    /**
-     * Sets directory length
-     * 
-     * @param dir_len
-     */
-    protected void setDirLen(long dir_len) {
-        this.dir_len = dir_len;
-    }
-
-    /**
-     * Returns data offset
-     * 
-     * @return data_offset
-     */
-    public long getDataOffset() {
-        return data_offset;
-    }
-
-    /**
-     * Sets data offset
-     * 
-     * @param data_offset
-     */
-    protected void setDataOffset(long data_offset) {
-        this.data_offset = data_offset;
-    }
-
-    /**
-     * Copies 4 first bytes of the byte[]
-     * 
-     * @param data
-     * @param chmItsfHeader
-     * @param count
-     * @throws TikaException 
-     */
-    private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader,
-            int count) throws TikaException {
-        ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
-        System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
-        this.setCurrentPlace(this.getCurrentPlace() + count);
-        this.setDataRemained(this.getDataRemained() - count);
-    }
-
-    /**
-     * Copies X bytes of source byte[] to the dest byte[]
-     * 
-     * @param data
-     * @param dest
-     * @param count
-     * @return
-     */
-    private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
-        System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
-        this.setCurrentPlace(this.getCurrentPlace() + count);
-        this.setDataRemained(this.getDataRemained() - count);
-        return dest;
-    }
-
-    /**
-     * Takes 8 bytes and reverses them
-     * 
-     * @param data
-     * @param dest
-     * @return
-     * @throws TikaException 
-     */
-    private long unmarshalUint64(byte[] data, long dest) throws TikaException{
-        byte[] temp = new byte[8];
-        int i, j;
-
-        if (8 > this.getDataRemained())
-            throw new TikaException("8 > this.getDataRemained()");
-
-        for (i = 8, j = 7; i > 0; i--) {
-            temp[j--] = data[this.getCurrentPlace()];
-            this.setCurrentPlace(this.getCurrentPlace() + 1);
-        }
-
-        dest = new BigInteger(temp).longValue();
-        this.setDataRemained(this.getDataRemained() - 8);
-        return dest;
-    }
-
-    private int unmarshalInt32(byte[] data, int dest) throws TikaException{
-        ChmAssert.assertByteArrayNotNull(data);
-
-        if (4 > this.getDataRemained())
-            throw new TikaException("4 > dataLenght");
-        dest = (data[this.getCurrentPlace()] & 0xff)
-                | (data[this.getCurrentPlace() + 1] & 0xff) << 8
-                | (data[this.getCurrentPlace() + 2] & 0xff) << 16
-                | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
-
-        this.setCurrentPlace(this.getCurrentPlace() + 4);
-        this.setDataRemained(this.getDataRemained() - 4);
-        return dest;
-    }
-
-    private long unmarshalUInt32(byte[] data, long dest) throws TikaException{
-        ChmAssert.assertByteArrayNotNull(data);
-        if (4 > getDataRemained())
-            throw new TikaException("4 > dataLenght");
-        dest = data[this.getCurrentPlace()]
-                | data[this.getCurrentPlace() + 1] << 8
-                | data[this.getCurrentPlace() + 2] << 16
-                | data[this.getCurrentPlace() + 3] << 24;
-
-        setDataRemained(this.getDataRemained() - 4);
-        this.setCurrentPlace(this.getCurrentPlace() + 4);
-        return dest;
-    }
-
-    public static void main(String[] args) {
-    }
-
-    /**
-     * Sets data remained to be processed
-     * 
-     * @param dataRemained
-     */
-    private void setDataRemained(int dataRemained) {
-        this.dataRemained = dataRemained;
-    }
-
-    /**
-     * Returns data remained
-     * 
-     * @return data_remainned
-     */
-    private int getDataRemained() {
-        return dataRemained;
-    }
-
-    /**
-     * Sets current place in the byte[]
-     * 
-     * @param currentPlace
-     */
-    private void setCurrentPlace(int currentPlace) {
-        this.currentPlace = currentPlace;
-    }
-
-    /**
-     * Returns current place in the byte[]
-     * 
-     * @return current place
-     */
-    private int getCurrentPlace() {
-        return currentPlace;
-    }
-
-    // @Override
-    public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException {
-        if (data.length < ChmConstants.CHM_ITSF_V2_LEN
-                || data.length > ChmConstants.CHM_ITSF_V3_LEN)
-            throw new TikaException("we only know how to deal with the 0x58 and 0x60 byte structures");
-
-        chmItsfHeader.setDataRemained(data.length);
-        chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN);
-        chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion()));
-        chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen()));
-        chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c()));
-        chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified()));
-        chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId()));
-        chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16));
-        chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16));
-        chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset()));
-        chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
-        chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
-        chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
-        if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF))
-            throw new TikaException("seems not valid file");
-        if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
-            if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
-                throw new TikaException("something wrong with header");
-        } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
-            if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
-                throw new TikaException("unknown v3 header lenght");
-        } else
-            throw new ChmParsingException("unsupported chm format");
-
-        /*
-         * now, if we have a V3 structure, unmarshal the rest, otherwise,
-         * compute it
-         */
-        if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
-            if (chmItsfHeader.getDataRemained() >= 0)
-                chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
-                        + chmItsfHeader.getDirLen());
-            else
-                throw new TikaException("cannot set data offset, no data remained");
-        } else
-            chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
-                    + chmItsfHeader.getDirLen());
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.accessor;
+
+import java.math.BigInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.assertion.ChmAssert;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * The Header 0000: char[4] 'ITSF' 0004: DWORD 3 (Version number) 0008: DWORD
+ * Total header length, including header section table and following data. 000C:
+ * DWORD 1 (unknown) 0010: DWORD a timestamp 0014: DWORD Windows Language ID
+ * 0018: GUID {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} 0028: GUID
+ * {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} Note: a GUID is $10 bytes, arranged
+ * as 1 DWORD, 2 WORDs, and 8 BYTEs. 0000: QWORD Offset of section from
+ * beginning of file 0008: QWORD Length of section Following the header section
+ * table is 8 bytes of additional header data. In Version 2 files, this data is
+ * not there and the content section starts immediately after the directory.
+ * 
+ * {@link http
+ * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
+ * /?show-translation-form=1}
+ * 
+ */
+/* structure of ITSF headers */
+public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
+    private static final long serialVersionUID = 2215291838533213826L;
+    private byte[] signature;
+    private int version; /* 4 */
+    private int header_len; /* 8 */
+    private int unknown_000c; /* c */
+    private long last_modified; /* 10 */
+    private long lang_id; /* 14 */
+    private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
+    private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
+    private long unknown_offset; /* 38 */
+    private long unknown_len; /* 40 */
+    private long dir_offset; /* 48 */
+    private long dir_len; /* 50 */
+    private long data_offset; /* 58 (Not present before V3) */
+
+    /* local usage */
+    private int dataRemained;
+    private int currentPlace = 0;
+
+    public ChmItsfHeader() {
+        signature = ChmConstants.ITSF.getBytes(UTF_8); /* 0 (ITSF) */
+    }
+
+    /**
+     * Prints the values of ChmfHeader
+     */
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(new String(getSignature(), UTF_8) + " ");
+        sb.append(getVersion() + " ");
+        sb.append(getHeaderLen() + " ");
+        sb.append(getUnknown_000c() + " ");
+        sb.append(getLastModified() + " ");
+        sb.append(getLangId() + " ");
+        sb.append(getDir_uuid() + " ");
+        sb.append(getStream_uuid() + " ");
+        sb.append(getUnknownOffset() + " ");
+        sb.append(getUnknownLen() + " ");
+        sb.append(getDirOffset() + " ");
+        sb.append(getDirLen() + " ");
+        sb.append(getDataOffset() + " ");
+        return sb.toString();
+    }
+
+    /**
+     * Returns a signature of itsf header
+     * 
+     * @return itsf header
+     */
+    public byte[] getSignature() {
+        return signature;
+    }
+
+    /**
+     * Sets itsf header signature
+     * 
+     * @param signature
+     */
+    protected void setSignature(byte[] signature) {
+        this.signature = signature;
+    }
+
+    /**
+     * Returns itsf header version
+     * 
+     * @return itsf version
+     */
+    public int getVersion() {
+        return version;
+    }
+
+    /**
+     * Sets itsf version
+     * 
+     * @param version
+     */
+    protected void setVersion(int version) {
+        this.version = version;
+    }
+
+    /**
+     * Returns itsf header length
+     * 
+     * @return length
+     */
+    public int getHeaderLen() {
+        return header_len;
+    }
+
+    /**
+     * Sets itsf header length
+     * 
+     * @param header_len
+     */
+    protected void setHeaderLen(int header_len) {
+        this.header_len = header_len;
+    }
+
+    /**
+     * Returns unknown_00c value
+     * 
+     * @return unknown_00c
+     */
+    public int getUnknown_000c() {
+        return unknown_000c;
+    }
+
+    /**
+     * Sets unknown_00c
+     * 
+     * @param unknown_000c
+     */
+    protected void setUnknown_000c(int unknown_000c) {
+        this.unknown_000c = unknown_000c;
+    }
+
+    /**
+     * Returns last modified date of the chm file
+     * 
+     * @return last modified date as long
+     */
+    public long getLastModified() {
+        return last_modified;
+    }
+
+    /**
+     * Sets last modified date of the chm file
+     * 
+     * @param last_modified
+     */
+    protected void setLastModified(long last_modified) {
+        this.last_modified = last_modified;
+    }
+
+    /**
+     * Returns language ID
+     * 
+     * @return language_id
+     */
+    public long getLangId() {
+        return lang_id;
+    }
+
+    /**
+     * Sets language_id
+     * 
+     * @param lang_id
+     */
+    protected void setLangId(long lang_id) {
+        this.lang_id = lang_id;
+    }
+
+    /**
+     * Returns directory uuid
+     * 
+     * @return dir_uuid
+     */
+    public byte[] getDir_uuid() {
+        return dir_uuid;
+    }
+
+    /**
+     * Sets directory uuid
+     * 
+     * @param dir_uuid
+     */
+    protected void setDir_uuid(byte[] dir_uuid) {
+        this.dir_uuid = dir_uuid;
+    }
+
+    /**
+     * Returns stream uuid
+     * 
+     * @return stream_uuid
+     */
+    public byte[] getStream_uuid() {
+        return stream_uuid;
+    }
+
+    /**
+     * Sets stream uuid
+     * 
+     * @param stream_uuid
+     */
+    protected void setStream_uuid(byte[] stream_uuid) {
+        this.stream_uuid = stream_uuid;
+    }
+
+    /**
+     * Returns unknown offset
+     * 
+     * @return unknown_offset
+     */
+    public long getUnknownOffset() {
+        return unknown_offset;
+    }
+
+    /**
+     * Sets unknown offset
+     * 
+     * @param unknown_offset
+     */
+    protected void setUnknownOffset(long unknown_offset) {
+        this.unknown_offset = unknown_offset;
+    }
+
+    /**
+     * Returns unknown length
+     * 
+     * @return unknown_length
+     */
+    public long getUnknownLen() {
+        return unknown_len;
+    }
+
+    /**
+     * Sets unknown length
+     * 
+     * @param unknown_len
+     */
+    protected void setUnknownLen(long unknown_len) {
+        this.unknown_len = unknown_len;
+    }
+
+    /**
+     * Returns directory offset
+     * 
+     * @return directory_offset
+     */
+    public long getDirOffset() {
+        return dir_offset;
+    }
+
+    /**
+     * Sets directory offset
+     * 
+     * @param dir_offset
+     */
+    protected void setDirOffset(long dir_offset) {
+        this.dir_offset = dir_offset;
+    }
+
+    /**
+     * Returns directory length
+     * 
+     * @return directory_offset
+     */
+    public long getDirLen() {
+        return dir_len;
+    }
+
+    /**
+     * Sets directory length
+     * 
+     * @param dir_len
+     */
+    protected void setDirLen(long dir_len) {
+        this.dir_len = dir_len;
+    }
+
+    /**
+     * Returns data offset
+     * 
+     * @return data_offset
+     */
+    public long getDataOffset() {
+        return data_offset;
+    }
+
+    /**
+     * Sets data offset
+     * 
+     * @param data_offset
+     */
+    protected void setDataOffset(long data_offset) {
+        this.data_offset = data_offset;
+    }
+
+    /**
+     * Copies 4 first bytes of the byte[]
+     * 
+     * @param data
+     * @param chmItsfHeader
+     * @param count
+     * @throws TikaException 
+     */
+    private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader,
+            int count) throws TikaException {
+        ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
+        System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
+        this.setCurrentPlace(this.getCurrentPlace() + count);
+        this.setDataRemained(this.getDataRemained() - count);
+    }
+
+    /**
+     * Copies X bytes of source byte[] to the dest byte[]
+     * 
+     * @param data
+     * @param dest
+     * @param count
+     * @return
+     */
+    private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
+        System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
+        this.setCurrentPlace(this.getCurrentPlace() + count);
+        this.setDataRemained(this.getDataRemained() - count);
+        return dest;
+    }
+
+    /**
+     * Takes 8 bytes and reverses them
+     * 
+     * @param data
+     * @param dest
+     * @return
+     * @throws TikaException 
+     */
+    private long unmarshalUint64(byte[] data, long dest) throws TikaException{
+        byte[] temp = new byte[8];
+        int i, j;
+
+        if (8 > this.getDataRemained())
+            throw new TikaException("8 > this.getDataRemained()");
+
+        for (i = 8, j = 7; i > 0; i--) {
+            temp[j--] = data[this.getCurrentPlace()];
+            this.setCurrentPlace(this.getCurrentPlace() + 1);
+        }
+
+        dest = new BigInteger(temp).longValue();
+        this.setDataRemained(this.getDataRemained() - 8);
+        return dest;
+    }
+
+    private int unmarshalInt32(byte[] data, int dest) throws TikaException{
+        ChmAssert.assertByteArrayNotNull(data);
+
+        if (4 > this.getDataRemained())
+            throw new TikaException("4 > dataLenght");
+        dest = (data[this.getCurrentPlace()] & 0xff)
+                | (data[this.getCurrentPlace() + 1] & 0xff) << 8
+                | (data[this.getCurrentPlace() + 2] & 0xff) << 16
+                | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
+
+        this.setCurrentPlace(this.getCurrentPlace() + 4);
+        this.setDataRemained(this.getDataRemained() - 4);
+        return dest;
+    }
+
+    private long unmarshalUInt32(byte[] data, long dest) throws TikaException{
+        ChmAssert.assertByteArrayNotNull(data);
+        if (4 > getDataRemained())
+            throw new TikaException("4 > dataLenght");
+        dest = data[this.getCurrentPlace()]
+                | data[this.getCurrentPlace() + 1] << 8
+                | data[this.getCurrentPlace() + 2] << 16
+                | data[this.getCurrentPlace() + 3] << 24;
+
+        setDataRemained(this.getDataRemained() - 4);
+        this.setCurrentPlace(this.getCurrentPlace() + 4);
+        return dest;
+    }
+
+    public static void main(String[] args) {
+    }
+
+    /**
+     * Sets data remained to be processed
+     * 
+     * @param dataRemained
+     */
+    private void setDataRemained(int dataRemained) {
+        this.dataRemained = dataRemained;
+    }
+
+    /**
+     * Returns data remained
+     * 
+     * @return data_remainned
+     */
+    private int getDataRemained() {
+        return dataRemained;
+    }
+
+    /**
+     * Sets current place in the byte[]
+     * 
+     * @param currentPlace
+     */
+    private void setCurrentPlace(int currentPlace) {
+        this.currentPlace = currentPlace;
+    }
+
+    /**
+     * Returns current place in the byte[]
+     * 
+     * @return current place
+     */
+    private int getCurrentPlace() {
+        return currentPlace;
+    }
+
+    // @Override
+    public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException {
+        if (data.length < ChmConstants.CHM_ITSF_V2_LEN
+                || data.length > ChmConstants.CHM_ITSF_V3_LEN)
+            throw new TikaException("we only know how to deal with the 0x58 and 0x60 byte structures");
+
+        chmItsfHeader.setDataRemained(data.length);
+        chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN);
+        chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion()));
+        chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen()));
+        chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c()));
+        chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified()));
+        chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId()));
+        chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16));
+        chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16));
+        chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset()));
+        chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
+        chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
+        chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
+        if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF))
+            throw new TikaException("seems not valid file");
+        if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
+            if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
+                throw new TikaException("something wrong with header");
+        } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
+            if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
+                throw new TikaException("unknown v3 header lenght");
+        } else
+            throw new ChmParsingException("unsupported chm format");
+
+        /*
+         * now, if we have a V3 structure, unmarshal the rest, otherwise,
+         * compute it
+         */
+        if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
+            if (chmItsfHeader.getDataRemained() >= 0)
+                chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
+                        + chmItsfHeader.getDirLen());
+            else
+                throw new TikaException("cannot set data offset, no data remained");
+        } else
+            chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
+                    + chmItsfHeader.getDirLen());
+    }
+}