You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:34 UTC
[28/39] tika git commit: Convert new lines from windows to unix
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
index 1630edd..9d0a2f0 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
@@ -1,398 +1,398 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.accessor;
-
-import java.math.BigInteger;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.chm.core.ChmCommons;
-import org.apache.tika.parser.chm.core.ChmConstants;
-import org.apache.tika.parser.chm.exception.ChmParsingException;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-/**
- * Holds chm listing entries
- */
-public class ChmDirectoryListingSet {
- private List<DirectoryListingEntry> dlel;
- private byte[] data;
- private int placeHolder = -1;
- private long dataOffset = -1;
- private int controlDataIndex = -1;
- private int resetTableIndex = -1;
-
- private boolean isNotControlDataFound = true;
- private boolean isNotResetTableFound = true;
-
- /**
- * Constructs chm directory listing set
- *
- * @param data
- * byte[]
- * @param chmItsHeader
- * @param chmItspHeader
- * @throws TikaException
- */
- public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
- ChmItspHeader chmItspHeader) throws TikaException {
- setDirectoryListingEntryList(new ArrayList<DirectoryListingEntry>());
- ChmCommons.assertByteArrayNotNull(data);
- setData(data);
- enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
- }
-
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("list:=" + getDirectoryListingEntryList().toString()
- + System.getProperty("line.separator"));
- sb.append("number of list items:="
- + getDirectoryListingEntryList().size());
- return sb.toString();
- }
-
- /**
- * Returns control data index that located in List
- *
- * @return control data index
- */
- public int getControlDataIndex() {
- return controlDataIndex;
- }
-
- /**
- * Sets control data index
- *
- * @param controlDataIndex
- */
- protected void setControlDataIndex(int controlDataIndex) {
- this.controlDataIndex = controlDataIndex;
- }
-
- /**
- * Return index of reset table
- *
- * @return reset table index
- */
- public int getResetTableIndex() {
- return resetTableIndex;
- }
-
- /**
- * Sets reset table index
- *
- * @param resetTableIndex
- */
- protected void setResetTableIndex(int resetTableIndex) {
- this.resetTableIndex = resetTableIndex;
- }
-
- /**
- * Sets place holder
- *
- * @param placeHolder
- */
- private void setPlaceHolder(int placeHolder) {
- this.placeHolder = placeHolder;
- }
-
- private ChmPmglHeader PMGLheader;
- /**
- * Enumerates chm directory listing entries
- *
- * @param chmItsHeader
- * chm itsf PMGLheader
- * @param chmItspHeader
- * chm itsp PMGLheader
- */
- private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
- ChmItspHeader chmItspHeader) {
- try {
- int startPmgl = chmItspHeader.getIndex_head();
- int stopPmgl = chmItspHeader.getUnknown_0024();
- int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader
- .getHeader_len());
- setDataOffset(chmItsHeader.getDataOffset());
-
- /* loops over all pmgls */
- byte[] dir_chunk = null;
- for (int i = startPmgl; i>=0; ) {
- dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
- int start = i * (int) chmItspHeader.getBlock_len() + dir_offset;
- dir_chunk = ChmCommons
- .copyOfRange(getData(), start,
- start +(int) chmItspHeader.getBlock_len());
-
- PMGLheader = new ChmPmglHeader();
- PMGLheader.parse(dir_chunk, PMGLheader);
- enumerateOneSegment(dir_chunk);
-
- i=PMGLheader.getBlockNext();
- dir_chunk = null;
- }
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- setData(null);
- }
- }
-
- /**
- * Checks control data
- *
- * @param dle
- * chm directory listing entry
- */
- private void checkControlData(DirectoryListingEntry dle) {
- if (isNotControlDataFound) {
- if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
- setControlDataIndex(getDirectoryListingEntryList().size());
- isNotControlDataFound = false;
- }
- }
- }
-
- /**
- * Checks reset table
- *
- * @param dle
- * chm directory listing entry
- */
- private void checkResetTable(DirectoryListingEntry dle) {
- if (isNotResetTableFound) {
- if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
- setResetTableIndex(getDirectoryListingEntryList().size());
- isNotResetTableFound = false;
- }
- }
- }
-
- public static final boolean startsWith(byte[] data, String prefix) {
- for (int i=0; i<prefix.length(); i++) {
- if (data[i]!=prefix.charAt(i)) {
- return false;
- }
- }
-
- return true;
- }
- /**
- * Enumerates chm directory listing entries in single chm segment
- *
- * @param dir_chunk
- */
- private void enumerateOneSegment(byte[] dir_chunk) throws ChmParsingException {
-// try {
- if (dir_chunk != null) {
- int header_len;
- if (startsWith(dir_chunk, ChmConstants.CHM_PMGI_MARKER)) {
- header_len = ChmConstants.CHM_PMGI_LEN;
- return; //skip PMGI
- }
- else if (startsWith(dir_chunk, ChmConstants.PMGL)) {
- header_len = ChmConstants.CHM_PMGL_LEN;
- }
- else {
- throw new ChmParsingException("Bad dir entry block.");
- }
-
- placeHolder = header_len;
- //setPlaceHolder(header_len);
- while (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
- /*&& dir_chunk[placeHolder - 1] != 115*/)
- {
- //get entry name length
- int strlen = 0;// = getEncint(data);
- byte temp;
- while ((temp=dir_chunk[placeHolder++]) >= 0x80)
- {
- strlen <<= 7;
- strlen += temp & 0x7f;
- }
-
- strlen = (strlen << 7) + temp & 0x7f;
-
- if (strlen>dir_chunk.length) {
- throw new ChmParsingException("Bad data of a string length.");
- }
-
- DirectoryListingEntry dle = new DirectoryListingEntry();
- dle.setNameLength(strlen);
- dle.setName(new String(ChmCommons.copyOfRange(
- dir_chunk, placeHolder,
- (placeHolder + dle.getNameLength())), UTF_8));
-
- checkControlData(dle);
- checkResetTable(dle);
- setPlaceHolder(placeHolder
- + dle.getNameLength());
-
- /* Sets entry type */
- if (placeHolder < dir_chunk.length
- && dir_chunk[placeHolder] == 0)
- dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
- else
- dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
-
- setPlaceHolder(placeHolder + 1);
- dle.setOffset(getEncint(dir_chunk));
- dle.setLength(getEncint(dir_chunk));
- getDirectoryListingEntryList().add(dle);
- }
-
-// int indexWorkData = ChmCommons.indexOf(dir_chunk,
-// "::".getBytes(UTF_8));
-// int indexUserData = ChmCommons.indexOf(dir_chunk,
-// "/".getBytes(UTF_8));
-//
-// if (indexUserData>=0 && indexUserData < indexWorkData)
-// setPlaceHolder(indexUserData);
-// else if (indexWorkData>=0) {
-// setPlaceHolder(indexWorkData);
-// }
-// else {
-// setPlaceHolder(indexUserData);
-// }
-//
-// if (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
-// && dir_chunk[placeHolder - 1] != 115) {// #{
-// do {
-// if (dir_chunk[placeHolder - 1] > 0) {
-// DirectoryListingEntry dle = new DirectoryListingEntry();
-//
-// // two cases: 1. when dir_chunk[placeHolder -
-// // 1] == 0x73
-// // 2. when dir_chunk[placeHolder + 1] == 0x2f
-// doNameCheck(dir_chunk, dle);
-//
-// // dle.setName(new
-// // String(Arrays.copyOfRange(dir_chunk,
-// // placeHolder, (placeHolder +
-// // dle.getNameLength()))));
-// dle.setName(new String(ChmCommons.copyOfRange(
-// dir_chunk, placeHolder,
-// (placeHolder + dle.getNameLength())), UTF_8));
-// checkControlData(dle);
-// checkResetTable(dle);
-// setPlaceHolder(placeHolder
-// + dle.getNameLength());
-//
-// /* Sets entry type */
-// if (placeHolder < dir_chunk.length
-// && dir_chunk[placeHolder] == 0)
-// dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
-// else
-// dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
-//
-// setPlaceHolder(placeHolder + 1);
-// dle.setOffset(getEncint(dir_chunk));
-// dle.setLength(getEncint(dir_chunk));
-// getDirectoryListingEntryList().add(dle);
-// } else
-// setPlaceHolder(placeHolder + 1);
-//
-// } while (nextEntry(dir_chunk));
-// }
- }
-
-// } catch (Exception e) {
-// e.printStackTrace();
-// }
- }
-
-
- /**
- * Returns encrypted integer
- *
- * @param data_chunk
- *
- * @return
- */
- private int getEncint(byte[] data_chunk) {
- byte ob;
- BigInteger bi = BigInteger.ZERO;
- byte[] nb = new byte[1];
-
- if (placeHolder < data_chunk.length) {
- while ((ob = data_chunk[placeHolder]) < 0) {
- nb[0] = (byte) ((ob & 0x7f));
- bi = bi.shiftLeft(7).add(new BigInteger(nb));
- setPlaceHolder(placeHolder + 1);
- }
- nb[0] = (byte) ((ob & 0x7f));
- bi = bi.shiftLeft(7).add(new BigInteger(nb));
- setPlaceHolder(placeHolder + 1);
- }
- return bi.intValue();
- }
-
- /**
- * Sets chm directory listing entry list
- *
- * @param dlel
- * chm directory listing entry list
- */
- public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
- this.dlel = dlel;
- }
-
- /**
- * Returns chm directory listing entry list
- *
- * @return List<DirectoryListingEntry>
- */
- public List<DirectoryListingEntry> getDirectoryListingEntryList() {
- return dlel;
- }
-
- /**
- * Sets data
- *
- * @param data
- */
- private void setData(byte[] data) {
- this.data = data;
- }
-
- /**
- * Returns data
- *
- * @return
- */
- private byte[] getData() {
- return data;
- }
-
- /**
- * Sets data offset
- *
- * @param dataOffset
- */
- private void setDataOffset(long dataOffset) {
- this.dataOffset = dataOffset;
- }
-
- /**
- * Returns data offset
- *
- * @return dataOffset
- */
- public long getDataOffset() {
- return dataOffset;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.accessor;
+
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * Holds chm listing entries
+ */
+public class ChmDirectoryListingSet {
+ private List<DirectoryListingEntry> dlel;
+ private byte[] data;
+ private int placeHolder = -1;
+ private long dataOffset = -1;
+ private int controlDataIndex = -1;
+ private int resetTableIndex = -1;
+
+ private boolean isNotControlDataFound = true;
+ private boolean isNotResetTableFound = true;
+
+ /**
+ * Constructs chm directory listing set
+ *
+ * @param data
+ * byte[]
+ * @param chmItsHeader
+ * @param chmItspHeader
+ * @throws TikaException
+ */
+ public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
+ ChmItspHeader chmItspHeader) throws TikaException {
+ setDirectoryListingEntryList(new ArrayList<DirectoryListingEntry>());
+ ChmCommons.assertByteArrayNotNull(data);
+ setData(data);
+ enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("list:=" + getDirectoryListingEntryList().toString()
+ + System.getProperty("line.separator"));
+ sb.append("number of list items:="
+ + getDirectoryListingEntryList().size());
+ return sb.toString();
+ }
+
+ /**
+ * Returns control data index that located in List
+ *
+ * @return control data index
+ */
+ public int getControlDataIndex() {
+ return controlDataIndex;
+ }
+
+ /**
+ * Sets control data index
+ *
+ * @param controlDataIndex
+ */
+ protected void setControlDataIndex(int controlDataIndex) {
+ this.controlDataIndex = controlDataIndex;
+ }
+
+ /**
+ * Return index of reset table
+ *
+ * @return reset table index
+ */
+ public int getResetTableIndex() {
+ return resetTableIndex;
+ }
+
+ /**
+ * Sets reset table index
+ *
+ * @param resetTableIndex
+ */
+ protected void setResetTableIndex(int resetTableIndex) {
+ this.resetTableIndex = resetTableIndex;
+ }
+
+ /**
+ * Sets place holder
+ *
+ * @param placeHolder
+ */
+ private void setPlaceHolder(int placeHolder) {
+ this.placeHolder = placeHolder;
+ }
+
+ private ChmPmglHeader PMGLheader;
+ /**
+ * Enumerates chm directory listing entries
+ *
+ * @param chmItsHeader
+ * chm itsf PMGLheader
+ * @param chmItspHeader
+ * chm itsp PMGLheader
+ */
+ private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
+ ChmItspHeader chmItspHeader) {
+ try {
+ int startPmgl = chmItspHeader.getIndex_head();
+ int stopPmgl = chmItspHeader.getUnknown_0024();
+ int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader
+ .getHeader_len());
+ setDataOffset(chmItsHeader.getDataOffset());
+
+ /* loops over all pmgls */
+ byte[] dir_chunk = null;
+ for (int i = startPmgl; i>=0; ) {
+ dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
+ int start = i * (int) chmItspHeader.getBlock_len() + dir_offset;
+ dir_chunk = ChmCommons
+ .copyOfRange(getData(), start,
+ start +(int) chmItspHeader.getBlock_len());
+
+ PMGLheader = new ChmPmglHeader();
+ PMGLheader.parse(dir_chunk, PMGLheader);
+ enumerateOneSegment(dir_chunk);
+
+ i=PMGLheader.getBlockNext();
+ dir_chunk = null;
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ setData(null);
+ }
+ }
+
+ /**
+ * Checks control data
+ *
+ * @param dle
+ * chm directory listing entry
+ */
+ private void checkControlData(DirectoryListingEntry dle) {
+ if (isNotControlDataFound) {
+ if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
+ setControlDataIndex(getDirectoryListingEntryList().size());
+ isNotControlDataFound = false;
+ }
+ }
+ }
+
+ /**
+ * Checks reset table
+ *
+ * @param dle
+ * chm directory listing entry
+ */
+ private void checkResetTable(DirectoryListingEntry dle) {
+ if (isNotResetTableFound) {
+ if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
+ setResetTableIndex(getDirectoryListingEntryList().size());
+ isNotResetTableFound = false;
+ }
+ }
+ }
+
+ public static final boolean startsWith(byte[] data, String prefix) {
+ for (int i=0; i<prefix.length(); i++) {
+ if (data[i]!=prefix.charAt(i)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+ /**
+ * Enumerates chm directory listing entries in single chm segment
+ *
+ * @param dir_chunk
+ */
+ private void enumerateOneSegment(byte[] dir_chunk) throws ChmParsingException {
+// try {
+ if (dir_chunk != null) {
+ int header_len;
+ if (startsWith(dir_chunk, ChmConstants.CHM_PMGI_MARKER)) {
+ header_len = ChmConstants.CHM_PMGI_LEN;
+ return; //skip PMGI
+ }
+ else if (startsWith(dir_chunk, ChmConstants.PMGL)) {
+ header_len = ChmConstants.CHM_PMGL_LEN;
+ }
+ else {
+ throw new ChmParsingException("Bad dir entry block.");
+ }
+
+ placeHolder = header_len;
+ //setPlaceHolder(header_len);
+ while (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
+ /*&& dir_chunk[placeHolder - 1] != 115*/)
+ {
+ //get entry name length
+ int strlen = 0;// = getEncint(data);
+ byte temp;
+ while ((temp=dir_chunk[placeHolder++]) >= 0x80)
+ {
+ strlen <<= 7;
+ strlen += temp & 0x7f;
+ }
+
+ strlen = (strlen << 7) + temp & 0x7f;
+
+ if (strlen>dir_chunk.length) {
+ throw new ChmParsingException("Bad data of a string length.");
+ }
+
+ DirectoryListingEntry dle = new DirectoryListingEntry();
+ dle.setNameLength(strlen);
+ dle.setName(new String(ChmCommons.copyOfRange(
+ dir_chunk, placeHolder,
+ (placeHolder + dle.getNameLength())), UTF_8));
+
+ checkControlData(dle);
+ checkResetTable(dle);
+ setPlaceHolder(placeHolder
+ + dle.getNameLength());
+
+ /* Sets entry type */
+ if (placeHolder < dir_chunk.length
+ && dir_chunk[placeHolder] == 0)
+ dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
+ else
+ dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
+
+ setPlaceHolder(placeHolder + 1);
+ dle.setOffset(getEncint(dir_chunk));
+ dle.setLength(getEncint(dir_chunk));
+ getDirectoryListingEntryList().add(dle);
+ }
+
+// int indexWorkData = ChmCommons.indexOf(dir_chunk,
+// "::".getBytes(UTF_8));
+// int indexUserData = ChmCommons.indexOf(dir_chunk,
+// "/".getBytes(UTF_8));
+//
+// if (indexUserData>=0 && indexUserData < indexWorkData)
+// setPlaceHolder(indexUserData);
+// else if (indexWorkData>=0) {
+// setPlaceHolder(indexWorkData);
+// }
+// else {
+// setPlaceHolder(indexUserData);
+// }
+//
+// if (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace()
+// && dir_chunk[placeHolder - 1] != 115) {// #{
+// do {
+// if (dir_chunk[placeHolder - 1] > 0) {
+// DirectoryListingEntry dle = new DirectoryListingEntry();
+//
+// // two cases: 1. when dir_chunk[placeHolder -
+// // 1] == 0x73
+// // 2. when dir_chunk[placeHolder + 1] == 0x2f
+// doNameCheck(dir_chunk, dle);
+//
+// // dle.setName(new
+// // String(Arrays.copyOfRange(dir_chunk,
+// // placeHolder, (placeHolder +
+// // dle.getNameLength()))));
+// dle.setName(new String(ChmCommons.copyOfRange(
+// dir_chunk, placeHolder,
+// (placeHolder + dle.getNameLength())), UTF_8));
+// checkControlData(dle);
+// checkResetTable(dle);
+// setPlaceHolder(placeHolder
+// + dle.getNameLength());
+//
+// /* Sets entry type */
+// if (placeHolder < dir_chunk.length
+// && dir_chunk[placeHolder] == 0)
+// dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
+// else
+// dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
+//
+// setPlaceHolder(placeHolder + 1);
+// dle.setOffset(getEncint(dir_chunk));
+// dle.setLength(getEncint(dir_chunk));
+// getDirectoryListingEntryList().add(dle);
+// } else
+// setPlaceHolder(placeHolder + 1);
+//
+// } while (nextEntry(dir_chunk));
+// }
+ }
+
+// } catch (Exception e) {
+// e.printStackTrace();
+// }
+ }
+
+
+ /**
+ * Returns encrypted integer
+ *
+ * @param data_chunk
+ *
+ * @return
+ */
+ private int getEncint(byte[] data_chunk) {
+ byte ob;
+ BigInteger bi = BigInteger.ZERO;
+ byte[] nb = new byte[1];
+
+ if (placeHolder < data_chunk.length) {
+ while ((ob = data_chunk[placeHolder]) < 0) {
+ nb[0] = (byte) ((ob & 0x7f));
+ bi = bi.shiftLeft(7).add(new BigInteger(nb));
+ setPlaceHolder(placeHolder + 1);
+ }
+ nb[0] = (byte) ((ob & 0x7f));
+ bi = bi.shiftLeft(7).add(new BigInteger(nb));
+ setPlaceHolder(placeHolder + 1);
+ }
+ return bi.intValue();
+ }
+
+ /**
+ * Sets chm directory listing entry list
+ *
+ * @param dlel
+ * chm directory listing entry list
+ */
+ public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
+ this.dlel = dlel;
+ }
+
+ /**
+ * Returns chm directory listing entry list
+ *
+ * @return List<DirectoryListingEntry>
+ */
+ public List<DirectoryListingEntry> getDirectoryListingEntryList() {
+ return dlel;
+ }
+
+ /**
+ * Sets data
+ *
+ * @param data
+ */
+ private void setData(byte[] data) {
+ this.data = data;
+ }
+
+ /**
+ * Returns data
+ *
+ * @return
+ */
+ private byte[] getData() {
+ return data;
+ }
+
+ /**
+ * Sets data offset
+ *
+ * @param dataOffset
+ */
+ private void setDataOffset(long dataOffset) {
+ this.dataOffset = dataOffset;
+ }
+
+ /**
+ * Returns data offset
+ *
+ * @return dataOffset
+ */
+ public long getDataOffset() {
+ return dataOffset;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
index a231e14..2c4dc4e 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
@@ -1,492 +1,492 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.accessor;
-
-import java.math.BigInteger;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.chm.assertion.ChmAssert;
-import org.apache.tika.parser.chm.core.ChmConstants;
-import org.apache.tika.parser.chm.exception.ChmParsingException;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-/**
- * The Header 0000: char[4] 'ITSF' 0004: DWORD 3 (Version number) 0008: DWORD
- * Total header length, including header section table and following data. 000C:
- * DWORD 1 (unknown) 0010: DWORD a timestamp 0014: DWORD Windows Language ID
- * 0018: GUID {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} 0028: GUID
- * {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} Note: a GUID is $10 bytes, arranged
- * as 1 DWORD, 2 WORDs, and 8 BYTEs. 0000: QWORD Offset of section from
- * beginning of file 0008: QWORD Length of section Following the header section
- * table is 8 bytes of additional header data. In Version 2 files, this data is
- * not there and the content section starts immediately after the directory.
- *
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?show-translation-form=1}
- *
- */
-/* structure of ITSF headers */
-public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
- private static final long serialVersionUID = 2215291838533213826L;
- private byte[] signature;
- private int version; /* 4 */
- private int header_len; /* 8 */
- private int unknown_000c; /* c */
- private long last_modified; /* 10 */
- private long lang_id; /* 14 */
- private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
- private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
- private long unknown_offset; /* 38 */
- private long unknown_len; /* 40 */
- private long dir_offset; /* 48 */
- private long dir_len; /* 50 */
- private long data_offset; /* 58 (Not present before V3) */
-
- /* local usage */
- private int dataRemained;
- private int currentPlace = 0;
-
- public ChmItsfHeader() {
- signature = ChmConstants.ITSF.getBytes(UTF_8); /* 0 (ITSF) */
- }
-
- /**
- * Prints the values of ChmfHeader
- */
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(new String(getSignature(), UTF_8) + " ");
- sb.append(getVersion() + " ");
- sb.append(getHeaderLen() + " ");
- sb.append(getUnknown_000c() + " ");
- sb.append(getLastModified() + " ");
- sb.append(getLangId() + " ");
- sb.append(getDir_uuid() + " ");
- sb.append(getStream_uuid() + " ");
- sb.append(getUnknownOffset() + " ");
- sb.append(getUnknownLen() + " ");
- sb.append(getDirOffset() + " ");
- sb.append(getDirLen() + " ");
- sb.append(getDataOffset() + " ");
- return sb.toString();
- }
-
- /**
- * Returns a signature of itsf header
- *
- * @return itsf header
- */
- public byte[] getSignature() {
- return signature;
- }
-
- /**
- * Sets itsf header signature
- *
- * @param signature
- */
- protected void setSignature(byte[] signature) {
- this.signature = signature;
- }
-
- /**
- * Returns itsf header version
- *
- * @return itsf version
- */
- public int getVersion() {
- return version;
- }
-
- /**
- * Sets itsf version
- *
- * @param version
- */
- protected void setVersion(int version) {
- this.version = version;
- }
-
- /**
- * Returns itsf header length
- *
- * @return length
- */
- public int getHeaderLen() {
- return header_len;
- }
-
- /**
- * Sets itsf header length
- *
- * @param header_len
- */
- protected void setHeaderLen(int header_len) {
- this.header_len = header_len;
- }
-
- /**
- * Returns unknown_00c value
- *
- * @return unknown_00c
- */
- public int getUnknown_000c() {
- return unknown_000c;
- }
-
- /**
- * Sets unknown_00c
- *
- * @param unknown_000c
- */
- protected void setUnknown_000c(int unknown_000c) {
- this.unknown_000c = unknown_000c;
- }
-
- /**
- * Returns last modified date of the chm file
- *
- * @return last modified date as long
- */
- public long getLastModified() {
- return last_modified;
- }
-
- /**
- * Sets last modified date of the chm file
- *
- * @param last_modified
- */
- protected void setLastModified(long last_modified) {
- this.last_modified = last_modified;
- }
-
- /**
- * Returns language ID
- *
- * @return language_id
- */
- public long getLangId() {
- return lang_id;
- }
-
- /**
- * Sets language_id
- *
- * @param lang_id
- */
- protected void setLangId(long lang_id) {
- this.lang_id = lang_id;
- }
-
- /**
- * Returns directory uuid
- *
- * @return dir_uuid
- */
- public byte[] getDir_uuid() {
- return dir_uuid;
- }
-
- /**
- * Sets directory uuid
- *
- * @param dir_uuid
- */
- protected void setDir_uuid(byte[] dir_uuid) {
- this.dir_uuid = dir_uuid;
- }
-
- /**
- * Returns stream uuid
- *
- * @return stream_uuid
- */
- public byte[] getStream_uuid() {
- return stream_uuid;
- }
-
- /**
- * Sets stream uuid
- *
- * @param stream_uuid
- */
- protected void setStream_uuid(byte[] stream_uuid) {
- this.stream_uuid = stream_uuid;
- }
-
- /**
- * Returns unknown offset
- *
- * @return unknown_offset
- */
- public long getUnknownOffset() {
- return unknown_offset;
- }
-
- /**
- * Sets unknown offset
- *
- * @param unknown_offset
- */
- protected void setUnknownOffset(long unknown_offset) {
- this.unknown_offset = unknown_offset;
- }
-
- /**
- * Returns unknown length
- *
- * @return unknown_length
- */
- public long getUnknownLen() {
- return unknown_len;
- }
-
- /**
- * Sets unknown length
- *
- * @param unknown_len
- */
- protected void setUnknownLen(long unknown_len) {
- this.unknown_len = unknown_len;
- }
-
- /**
- * Returns directory offset
- *
- * @return directory_offset
- */
- public long getDirOffset() {
- return dir_offset;
- }
-
- /**
- * Sets directory offset
- *
- * @param dir_offset
- */
- protected void setDirOffset(long dir_offset) {
- this.dir_offset = dir_offset;
- }
-
- /**
- * Returns directory length
- *
- * @return directory_offset
- */
- public long getDirLen() {
- return dir_len;
- }
-
- /**
- * Sets directory length
- *
- * @param dir_len
- */
- protected void setDirLen(long dir_len) {
- this.dir_len = dir_len;
- }
-
- /**
- * Returns data offset
- *
- * @return data_offset
- */
- public long getDataOffset() {
- return data_offset;
- }
-
- /**
- * Sets data offset
- *
- * @param data_offset
- */
- protected void setDataOffset(long data_offset) {
- this.data_offset = data_offset;
- }
-
- /**
- * Copies 4 first bytes of the byte[]
- *
- * @param data
- * @param chmItsfHeader
- * @param count
- * @throws TikaException
- */
- private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader,
- int count) throws TikaException {
- ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
- System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
- this.setCurrentPlace(this.getCurrentPlace() + count);
- this.setDataRemained(this.getDataRemained() - count);
- }
-
- /**
- * Copies X bytes of source byte[] to the dest byte[]
- *
- * @param data
- * @param dest
- * @param count
- * @return
- */
- private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
- System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
- this.setCurrentPlace(this.getCurrentPlace() + count);
- this.setDataRemained(this.getDataRemained() - count);
- return dest;
- }
-
- /**
- * Takes 8 bytes and reverses them
- *
- * @param data
- * @param dest
- * @return
- * @throws TikaException
- */
- private long unmarshalUint64(byte[] data, long dest) throws TikaException{
- byte[] temp = new byte[8];
- int i, j;
-
- if (8 > this.getDataRemained())
- throw new TikaException("8 > this.getDataRemained()");
-
- for (i = 8, j = 7; i > 0; i--) {
- temp[j--] = data[this.getCurrentPlace()];
- this.setCurrentPlace(this.getCurrentPlace() + 1);
- }
-
- dest = new BigInteger(temp).longValue();
- this.setDataRemained(this.getDataRemained() - 8);
- return dest;
- }
-
- private int unmarshalInt32(byte[] data, int dest) throws TikaException{
- ChmAssert.assertByteArrayNotNull(data);
-
- if (4 > this.getDataRemained())
- throw new TikaException("4 > dataLenght");
- dest = (data[this.getCurrentPlace()] & 0xff)
- | (data[this.getCurrentPlace() + 1] & 0xff) << 8
- | (data[this.getCurrentPlace() + 2] & 0xff) << 16
- | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
-
- this.setCurrentPlace(this.getCurrentPlace() + 4);
- this.setDataRemained(this.getDataRemained() - 4);
- return dest;
- }
-
- private long unmarshalUInt32(byte[] data, long dest) throws TikaException{
- ChmAssert.assertByteArrayNotNull(data);
- if (4 > getDataRemained())
- throw new TikaException("4 > dataLenght");
- dest = data[this.getCurrentPlace()]
- | data[this.getCurrentPlace() + 1] << 8
- | data[this.getCurrentPlace() + 2] << 16
- | data[this.getCurrentPlace() + 3] << 24;
-
- setDataRemained(this.getDataRemained() - 4);
- this.setCurrentPlace(this.getCurrentPlace() + 4);
- return dest;
- }
-
- public static void main(String[] args) {
- }
-
- /**
- * Sets data remained to be processed
- *
- * @param dataRemained
- */
- private void setDataRemained(int dataRemained) {
- this.dataRemained = dataRemained;
- }
-
- /**
- * Returns data remained
- *
- * @return data_remainned
- */
- private int getDataRemained() {
- return dataRemained;
- }
-
- /**
- * Sets current place in the byte[]
- *
- * @param currentPlace
- */
- private void setCurrentPlace(int currentPlace) {
- this.currentPlace = currentPlace;
- }
-
- /**
- * Returns current place in the byte[]
- *
- * @return current place
- */
- private int getCurrentPlace() {
- return currentPlace;
- }
-
- // @Override
- public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException {
- if (data.length < ChmConstants.CHM_ITSF_V2_LEN
- || data.length > ChmConstants.CHM_ITSF_V3_LEN)
- throw new TikaException("we only know how to deal with the 0x58 and 0x60 byte structures");
-
- chmItsfHeader.setDataRemained(data.length);
- chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN);
- chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion()));
- chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen()));
- chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c()));
- chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified()));
- chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId()));
- chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16));
- chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16));
- chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset()));
- chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
- chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
- chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
- if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF))
- throw new TikaException("seems not valid file");
- if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
- if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
- throw new TikaException("something wrong with header");
- } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
- if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
- throw new TikaException("unknown v3 header lenght");
- } else
- throw new ChmParsingException("unsupported chm format");
-
- /*
- * now, if we have a V3 structure, unmarshal the rest, otherwise,
- * compute it
- */
- if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
- if (chmItsfHeader.getDataRemained() >= 0)
- chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
- + chmItsfHeader.getDirLen());
- else
- throw new TikaException("cannot set data offset, no data remained");
- } else
- chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
- + chmItsfHeader.getDirLen());
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.accessor;
+
+import java.math.BigInteger;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.assertion.ChmAssert;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * The Header 0000: char[4] 'ITSF' 0004: DWORD 3 (Version number) 0008: DWORD
+ * Total header length, including header section table and following data. 000C:
+ * DWORD 1 (unknown) 0010: DWORD a timestamp 0014: DWORD Windows Language ID
+ * 0018: GUID {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} 0028: GUID
+ * {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} Note: a GUID is $10 bytes, arranged
+ * as 1 DWORD, 2 WORDs, and 8 BYTEs. 0000: QWORD Offset of section from
+ * beginning of file 0008: QWORD Length of section Following the header section
+ * table is 8 bytes of additional header data. In Version 2 files, this data is
+ * not there and the content section starts immediately after the directory.
+ *
+ * {@link http
+ * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
+ * /?show-translation-form=1}
+ *
+ */
+/* structure of ITSF headers */
+public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
+ private static final long serialVersionUID = 2215291838533213826L;
+ private byte[] signature;
+ private int version; /* 4 */
+ private int header_len; /* 8 */
+ private int unknown_000c; /* c */
+ private long last_modified; /* 10 */
+ private long lang_id; /* 14 */
+ private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
+ private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
+ private long unknown_offset; /* 38 */
+ private long unknown_len; /* 40 */
+ private long dir_offset; /* 48 */
+ private long dir_len; /* 50 */
+ private long data_offset; /* 58 (Not present before V3) */
+
+ /* local usage */
+ private int dataRemained;
+ private int currentPlace = 0;
+
+ public ChmItsfHeader() {
+ signature = ChmConstants.ITSF.getBytes(UTF_8); /* 0 (ITSF) */
+ }
+
+ /**
+ * Prints the values of ChmfHeader
+ */
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(new String(getSignature(), UTF_8) + " ");
+ sb.append(getVersion() + " ");
+ sb.append(getHeaderLen() + " ");
+ sb.append(getUnknown_000c() + " ");
+ sb.append(getLastModified() + " ");
+ sb.append(getLangId() + " ");
+ sb.append(getDir_uuid() + " ");
+ sb.append(getStream_uuid() + " ");
+ sb.append(getUnknownOffset() + " ");
+ sb.append(getUnknownLen() + " ");
+ sb.append(getDirOffset() + " ");
+ sb.append(getDirLen() + " ");
+ sb.append(getDataOffset() + " ");
+ return sb.toString();
+ }
+
+ /**
+ * Returns a signature of itsf header
+ *
+ * @return itsf header
+ */
+ public byte[] getSignature() {
+ return signature;
+ }
+
+ /**
+ * Sets itsf header signature
+ *
+ * @param signature
+ */
+ protected void setSignature(byte[] signature) {
+ this.signature = signature;
+ }
+
+ /**
+ * Returns itsf header version
+ *
+ * @return itsf version
+ */
+ public int getVersion() {
+ return version;
+ }
+
+ /**
+ * Sets itsf version
+ *
+ * @param version
+ */
+ protected void setVersion(int version) {
+ this.version = version;
+ }
+
+ /**
+ * Returns itsf header length
+ *
+ * @return length
+ */
+ public int getHeaderLen() {
+ return header_len;
+ }
+
+ /**
+ * Sets itsf header length
+ *
+ * @param header_len
+ */
+ protected void setHeaderLen(int header_len) {
+ this.header_len = header_len;
+ }
+
+ /**
+ * Returns unknown_00c value
+ *
+ * @return unknown_00c
+ */
+ public int getUnknown_000c() {
+ return unknown_000c;
+ }
+
+ /**
+ * Sets unknown_00c
+ *
+ * @param unknown_000c
+ */
+ protected void setUnknown_000c(int unknown_000c) {
+ this.unknown_000c = unknown_000c;
+ }
+
+ /**
+ * Returns last modified date of the chm file
+ *
+ * @return last modified date as long
+ */
+ public long getLastModified() {
+ return last_modified;
+ }
+
+ /**
+ * Sets last modified date of the chm file
+ *
+ * @param last_modified
+ */
+ protected void setLastModified(long last_modified) {
+ this.last_modified = last_modified;
+ }
+
+ /**
+ * Returns language ID
+ *
+ * @return language_id
+ */
+ public long getLangId() {
+ return lang_id;
+ }
+
+ /**
+ * Sets language_id
+ *
+ * @param lang_id
+ */
+ protected void setLangId(long lang_id) {
+ this.lang_id = lang_id;
+ }
+
+ /**
+ * Returns directory uuid
+ *
+ * @return dir_uuid
+ */
+ public byte[] getDir_uuid() {
+ return dir_uuid;
+ }
+
+ /**
+ * Sets directory uuid
+ *
+ * @param dir_uuid
+ */
+ protected void setDir_uuid(byte[] dir_uuid) {
+ this.dir_uuid = dir_uuid;
+ }
+
+ /**
+ * Returns stream uuid
+ *
+ * @return stream_uuid
+ */
+ public byte[] getStream_uuid() {
+ return stream_uuid;
+ }
+
+ /**
+ * Sets stream uuid
+ *
+ * @param stream_uuid
+ */
+ protected void setStream_uuid(byte[] stream_uuid) {
+ this.stream_uuid = stream_uuid;
+ }
+
+ /**
+ * Returns unknown offset
+ *
+ * @return unknown_offset
+ */
+ public long getUnknownOffset() {
+ return unknown_offset;
+ }
+
+ /**
+ * Sets unknown offset
+ *
+ * @param unknown_offset
+ */
+ protected void setUnknownOffset(long unknown_offset) {
+ this.unknown_offset = unknown_offset;
+ }
+
+ /**
+ * Returns unknown length
+ *
+ * @return unknown_length
+ */
+ public long getUnknownLen() {
+ return unknown_len;
+ }
+
+ /**
+ * Sets unknown length
+ *
+ * @param unknown_len
+ */
+ protected void setUnknownLen(long unknown_len) {
+ this.unknown_len = unknown_len;
+ }
+
+ /**
+ * Returns directory offset
+ *
+ * @return directory_offset
+ */
+ public long getDirOffset() {
+ return dir_offset;
+ }
+
+ /**
+ * Sets directory offset
+ *
+ * @param dir_offset
+ */
+ protected void setDirOffset(long dir_offset) {
+ this.dir_offset = dir_offset;
+ }
+
+ /**
+ * Returns directory length
+ *
+ * @return directory_offset
+ */
+ public long getDirLen() {
+ return dir_len;
+ }
+
+ /**
+ * Sets directory length
+ *
+ * @param dir_len
+ */
+ protected void setDirLen(long dir_len) {
+ this.dir_len = dir_len;
+ }
+
+ /**
+ * Returns data offset
+ *
+ * @return data_offset
+ */
+ public long getDataOffset() {
+ return data_offset;
+ }
+
+ /**
+ * Sets data offset
+ *
+ * @param data_offset
+ */
+ protected void setDataOffset(long data_offset) {
+ this.data_offset = data_offset;
+ }
+
+ /**
+ * Copies 4 first bytes of the byte[]
+ *
+ * @param data
+ * @param chmItsfHeader
+ * @param count
+ * @throws TikaException
+ */
+ private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader,
+ int count) throws TikaException {
+ ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
+ System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
+ this.setCurrentPlace(this.getCurrentPlace() + count);
+ this.setDataRemained(this.getDataRemained() - count);
+ }
+
+ /**
+ * Copies X bytes of source byte[] to the dest byte[]
+ *
+ * @param data
+ * @param dest
+ * @param count
+ * @return
+ */
+ private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
+ System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
+ this.setCurrentPlace(this.getCurrentPlace() + count);
+ this.setDataRemained(this.getDataRemained() - count);
+ return dest;
+ }
+
+ /**
+ * Takes 8 bytes and reverses them
+ *
+ * @param data
+ * @param dest
+ * @return
+ * @throws TikaException
+ */
+ private long unmarshalUint64(byte[] data, long dest) throws TikaException{
+ byte[] temp = new byte[8];
+ int i, j;
+
+ if (8 > this.getDataRemained())
+ throw new TikaException("8 > this.getDataRemained()");
+
+ for (i = 8, j = 7; i > 0; i--) {
+ temp[j--] = data[this.getCurrentPlace()];
+ this.setCurrentPlace(this.getCurrentPlace() + 1);
+ }
+
+ dest = new BigInteger(temp).longValue();
+ this.setDataRemained(this.getDataRemained() - 8);
+ return dest;
+ }
+
+ private int unmarshalInt32(byte[] data, int dest) throws TikaException{
+ ChmAssert.assertByteArrayNotNull(data);
+
+ if (4 > this.getDataRemained())
+ throw new TikaException("4 > dataLenght");
+ dest = (data[this.getCurrentPlace()] & 0xff)
+ | (data[this.getCurrentPlace() + 1] & 0xff) << 8
+ | (data[this.getCurrentPlace() + 2] & 0xff) << 16
+ | (data[this.getCurrentPlace() + 3] & 0xff) << 24;
+
+ this.setCurrentPlace(this.getCurrentPlace() + 4);
+ this.setDataRemained(this.getDataRemained() - 4);
+ return dest;
+ }
+
+ private long unmarshalUInt32(byte[] data, long dest) throws TikaException{
+ ChmAssert.assertByteArrayNotNull(data);
+ if (4 > getDataRemained())
+ throw new TikaException("4 > dataLenght");
+ dest = data[this.getCurrentPlace()]
+ | data[this.getCurrentPlace() + 1] << 8
+ | data[this.getCurrentPlace() + 2] << 16
+ | data[this.getCurrentPlace() + 3] << 24;
+
+ setDataRemained(this.getDataRemained() - 4);
+ this.setCurrentPlace(this.getCurrentPlace() + 4);
+ return dest;
+ }
+
+ public static void main(String[] args) {
+ }
+
+ /**
+ * Sets data remained to be processed
+ *
+ * @param dataRemained
+ */
+ private void setDataRemained(int dataRemained) {
+ this.dataRemained = dataRemained;
+ }
+
+ /**
+ * Returns data remained
+ *
+ * @return data_remainned
+ */
+ private int getDataRemained() {
+ return dataRemained;
+ }
+
+ /**
+ * Sets current place in the byte[]
+ *
+ * @param currentPlace
+ */
+ private void setCurrentPlace(int currentPlace) {
+ this.currentPlace = currentPlace;
+ }
+
+ /**
+ * Returns current place in the byte[]
+ *
+ * @return current place
+ */
+ private int getCurrentPlace() {
+ return currentPlace;
+ }
+
+ // @Override
+ public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException {
+ if (data.length < ChmConstants.CHM_ITSF_V2_LEN
+ || data.length > ChmConstants.CHM_ITSF_V3_LEN)
+ throw new TikaException("we only know how to deal with the 0x58 and 0x60 byte structures");
+
+ chmItsfHeader.setDataRemained(data.length);
+ chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN);
+ chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion()));
+ chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen()));
+ chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c()));
+ chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified()));
+ chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId()));
+ chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16));
+ chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16));
+ chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset()));
+ chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen()));
+ chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset()));
+ chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen()));
+ if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF))
+ throw new TikaException("seems not valid file");
+ if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
+ if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
+ throw new TikaException("something wrong with header");
+ } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
+ if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
+ throw new TikaException("unknown v3 header lenght");
+ } else
+ throw new ChmParsingException("unsupported chm format");
+
+ /*
+ * now, if we have a V3 structure, unmarshal the rest, otherwise,
+ * compute it
+ */
+ if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
+ if (chmItsfHeader.getDataRemained() >= 0)
+ chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
+ + chmItsfHeader.getDirLen());
+ else
+ throw new TikaException("cannot set data offset, no data remained");
+ } else
+ chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
+ + chmItsfHeader.getDirLen());
+ }
+}