You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ol...@apache.org on 2011/06/08 23:00:28 UTC
svn commit: r1133554 [1/5] - in
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm: ./
accessor/ assertion/ core/ exception/ lzx/
Author: oleg
Date: Wed Jun 8 21:00:27 2011
New Revision: 1133554
URL: http://svn.apache.org/viewvc?rev=1133554&view=rev
Log:
support of Java 5
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxBlock.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmSection.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java Wed Jun 8 21:00:27 2011
@@ -29,30 +29,30 @@ import org.xml.sax.SAXException;
*/
public class CHM2XHTML {
- public static void process(CHMDocumentInformation chmDoc,
- ContentHandler handler) throws TikaException {
- String text = chmDoc.getText();
- try {
- if (text.length() > 0) {
- handler.characters(text.toCharArray(), 0, text.length());
- new CHM2XHTML(chmDoc, handler);
- } else
- /* The error handling should be added */
- System.err.println("Could not extract content");
+ public static void process(CHMDocumentInformation chmDoc,
+ ContentHandler handler) throws TikaException {
+ String text = chmDoc.getText();
+ try {
+ if (text.length() > 0) {
+ handler.characters(text.toCharArray(), 0, text.length());
+ new CHM2XHTML(chmDoc, handler);
+ } else
+ /* The error handling should be added */
+ System.err.println("Could not extract content");
- } catch (SAXException e) {
- // System.err.println(ChmParserUtils.getStackTrace(e.getStackTrace()));
- }
- }
+ } catch (SAXException e) {
+ // System.err.println(ChmParserUtils.getStackTrace(e.getStackTrace()));
+ }
+ }
- protected String getText(CHMDocumentInformation chmDoc)
- throws TikaException {
- return chmDoc.getText();
- }
+ protected String getText(CHMDocumentInformation chmDoc)
+ throws TikaException {
+ return chmDoc.getText();
+ }
- protected TextContentHandler handler;
+ protected TextContentHandler handler;
- public CHM2XHTML(CHMDocumentInformation chmDoc, ContentHandler handler) {
- this.handler = new TextContentHandler(handler);
- }
+ public CHM2XHTML(CHMDocumentInformation chmDoc, ContentHandler handler) {
+ this.handler = new TextContentHandler(handler);
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java Wed Jun 8 21:00:27 2011
@@ -37,163 +37,162 @@ import org.xml.sax.SAXException;
*
*/
public class CHMDocumentInformation {
- /* Class members */
- private ChmExtractor chmExtractor = null;
+ /* Class members */
+ private ChmExtractor chmExtractor = null;
- /**
- * Loads chm file as input stream and returns a new instance of chm doc info
- *
- * @param is
- * InputStream
- *
- * @return chm document information
- */
- public static CHMDocumentInformation load(InputStream is) {
- return new CHMDocumentInformation().getInstance(is);
- }
-
- /**
- * Returns instance of chm document information
- *
- * @param is
- * InputStream
- *
- * @return
- */
- private CHMDocumentInformation getInstance(InputStream is) {
- setChmExtractor(new ChmExtractor(is));
- return this;
- }
-
- /**
- * Appends extracted data from chm listing entries
- *
- * @return extracted content of chm
- */
- private String getContent() {
- StringBuilder sb = new StringBuilder();
- DirectoryListingEntry entry;
- for (Iterator<DirectoryListingEntry> it = getChmExtractor()
- .getChmDirList().getDirectoryListingEntryList().iterator(); it
- .hasNext();) {
- try {
- entry = it.next();
- if (isRightEntry(entry)) {
- byte[][] tmp = getChmExtractor().extractChmEntry(entry);
- if (tmp != null) {
- sb.append(extract(tmp));
- }
- }
- } catch (ChmParsingException e) {// catch (IOException e) {
- System.out.println(e.getMessage());
- } // catch (IOException e) {//Pushback exception from tagsoup
- // System.err.println(e.getMessage());
- }
- return sb.toString();
- }
-
- /**
- * Checks if an entry is a html or not.
- *
- * @param entry
- * chm directory listing entry
- *
- * @return boolean
- */
- private boolean isRightEntry(DirectoryListingEntry entry) {
- return (entry.getName().endsWith(".html") || entry.getName().endsWith(
- ".htm"));
- }
-
- /**
- * Returns chm extractor
- *
- * @return chmExtractor
- */
- private ChmExtractor getChmExtractor() {
- return chmExtractor;
- }
-
- /**
- * Sets a chm extractor
- *
- * @param chmExtractor
- */
- private void setChmExtractor(ChmExtractor chmExtractor) {
- this.chmExtractor = chmExtractor;
- }
-
- /**
- * Returns chm metadata
- *
- * @param metadata
- *
- * @throws TikaException
- * @throws IOException
- */
- public void getCHMDocInformation(Metadata metadata) throws TikaException,
- IOException {
- if (getChmExtractor() != null) {
- /* Checking if file is a chm, done during creating chmItsf header */
- metadata.add(Metadata.CONTENT_TYPE, "application/x-chm");
- } else {
- metadata.add(Metadata.CONTENT_TYPE, "unknown");
- }
- }
-
- /**
- * Returns extracted text from chm file
- *
- * @return text
- *
- * @throws TikaException
- */
- public String getText() throws TikaException {
- return getContent();
- }
-
- /**
- * Extracts data from byte[][]
- *
- * @param byteObject
- * @return
- * @throws IOException
- * @throws SAXException
- */
- private String extract(byte[][] byteObject) {// throws IOException
- StringBuilder wBuf = new StringBuilder();
- InputStream stream = null;
- Metadata metadata = new Metadata();
- HtmlParser htmlParser = new HtmlParser();
- BodyContentHandler handler = new BodyContentHandler(-1);// -1
- ParseContext parser = new ParseContext();
- try {
- for (int i = 0; i < byteObject.length; i++) {
- stream = new ByteArrayInputStream(byteObject[i]);
- try {
- htmlParser.parse(stream, handler, metadata, parser);
- } catch (TikaException e) {
- wBuf.append(new String(byteObject[i]));
- System.err.println("\n"
- + CHMDocumentInformation.class.getName()
- + " extract " + e.getMessage());
- } finally {
- wBuf.append(handler.toString()
- + System.getProperty("line.separator"));
- stream.close();
- }
- }
- } catch (ChmParsingException e) {
- System.err.println(e.getMessage());
- } catch (SAXException e) {
- System.err.println(e.getMessage());
- } catch (IOException e) {// Pushback overflow from tagsoup
- // System.err.println(e.getMessage());
- }
- return wBuf.toString();
- }
+ /**
+ * Loads chm file as input stream and returns a new instance of chm doc info
+ *
+ * @param is
+ * InputStream
+ *
+ * @return chm document information
+ */
+ public static CHMDocumentInformation load(InputStream is) {
+ return new CHMDocumentInformation().getInstance(is);
+ }
+
+ /**
+ * Returns instance of chm document information
+ *
+ * @param is
+ * InputStream
+ *
+ * @return
+ */
+ private CHMDocumentInformation getInstance(InputStream is) {
+ setChmExtractor(new ChmExtractor(is));
+ return this;
+ }
+
+ /**
+ * Appends extracted data from chm listing entries
+ *
+ * @return extracted content of chm
+ */
+ private String getContent() {
+ StringBuilder sb = new StringBuilder();
+ DirectoryListingEntry entry;
+ for (Iterator<DirectoryListingEntry> it = getChmExtractor()
+ .getChmDirList().getDirectoryListingEntryList().iterator(); it
+ .hasNext();) {
+ try {
+ entry = it.next();
+ if (isRightEntry(entry)) {
+ byte[][] tmp = getChmExtractor().extractChmEntry(entry);
+ if (tmp != null) {
+ sb.append(extract(tmp));
+ }
+ }
+ } catch (ChmParsingException e) {// catch (IOException e) {
+ System.out.println(e.getMessage());
+ } // catch (IOException e) {//Pushback exception from tagsoup
+ // System.err.println(e.getMessage());
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Checks if an entry is a html or not.
+ *
+ * @param entry
+ * chm directory listing entry
+ *
+ * @return boolean
+ */
+ private boolean isRightEntry(DirectoryListingEntry entry) {
+ return (entry.getName().endsWith(".html") || entry.getName().endsWith(".htm"));
+ }
+
+ /**
+ * Returns chm extractor
+ *
+ * @return chmExtractor
+ */
+ private ChmExtractor getChmExtractor() {
+ return chmExtractor;
+ }
+
+ /**
+ * Sets a chm extractor
+ *
+ * @param chmExtractor
+ */
+ private void setChmExtractor(ChmExtractor chmExtractor) {
+ this.chmExtractor = chmExtractor;
+ }
+
+ /**
+ * Returns chm metadata
+ *
+ * @param metadata
+ *
+ * @throws TikaException
+ * @throws IOException
+ */
+ public void getCHMDocInformation(Metadata metadata) throws TikaException,
+ IOException {
+ if (getChmExtractor() != null) {
+ /* Checking if file is a chm, done during creating chmItsf header */
+ metadata.add(Metadata.CONTENT_TYPE, "application/x-chm");
+ } else {
+ metadata.add(Metadata.CONTENT_TYPE, "unknown");
+ }
+ }
+
+ /**
+ * Returns extracted text from chm file
+ *
+ * @return text
+ *
+ * @throws TikaException
+ */
+ public String getText() throws TikaException {
+ return getContent();
+ }
+
+ /**
+ * Extracts data from byte[][]
+ *
+ * @param byteObject
+ * @return
+ * @throws IOException
+ * @throws SAXException
+ */
+ private String extract(byte[][] byteObject) {// throws IOException
+ StringBuilder wBuf = new StringBuilder();
+ InputStream stream = null;
+ Metadata metadata = new Metadata();
+ HtmlParser htmlParser = new HtmlParser();
+ BodyContentHandler handler = new BodyContentHandler(-1);// -1
+ ParseContext parser = new ParseContext();
+ try {
+ for (int i = 0; i < byteObject.length; i++) {
+ stream = new ByteArrayInputStream(byteObject[i]);
+ try {
+ htmlParser.parse(stream, handler, metadata, parser);
+ } catch (TikaException e) {
+ wBuf.append(new String(byteObject[i]));
+ System.err.println("\n"
+ + CHMDocumentInformation.class.getName()
+ + " extract " + e.getMessage());
+ } finally {
+ wBuf.append(handler.toString()
+ + System.getProperty("line.separator"));
+ stream.close();
+ }
+ }
+ } catch (ChmParsingException e) {
+ System.err.println(e.getMessage());
+ } catch (SAXException e) {
+ System.err.println(e.getMessage());
+ } catch (IOException e) {// Pushback overflow from tagsoup
+ // System.err.println(e.getMessage());
+ }
+ return wBuf.toString();
+ }
- public static void main(String[] args) {
+ public static void main(String[] args) {
- }
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java Wed Jun 8 21:00:27 2011
@@ -31,25 +31,26 @@ import org.xml.sax.SAXException;
public class ChmParser extends AbstractParser {
- private static final long serialVersionUID = 5938777307516469802L;
- private static final Set<MediaType> SUPPORTED_TYPES = Collections
- .singleton(MediaType.application("chm"));
-
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return SUPPORTED_TYPES;
- }
-
- public void parse(InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context) throws IOException,
- SAXException, TikaException {
- CHMDocumentInformation chmInfo = CHMDocumentInformation.load(stream);
- metadata.set(Metadata.CONTENT_TYPE, "chm");
- extractMetadata(chmInfo, metadata);
- CHM2XHTML.process(chmInfo, handler);
- }
-
- private void extractMetadata(CHMDocumentInformation chmInfo,
- Metadata metadata) throws TikaException, IOException {
- chmInfo.getCHMDocInformation(metadata);
- }
+ private static final long serialVersionUID = 5938777307516469802L;
+ private static final Set<MediaType> SUPPORTED_TYPES = Collections
+ .singleton(MediaType.application("chm"));
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+ CHMDocumentInformation chmInfo = CHMDocumentInformation.load(stream);
+ metadata.set(Metadata.CONTENT_TYPE, "chm");
+ extractMetadata(chmInfo, metadata);
+ CHM2XHTML.process(chmInfo, handler);
+ }
+
+ private void extractMetadata(CHMDocumentInformation chmInfo,
+ Metadata metadata) throws TikaException, IOException {
+ chmInfo.getCHMDocInformation(metadata);
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java Wed Jun 8 21:00:27 2011
@@ -25,12 +25,12 @@ import java.io.Serializable;
* @param <T>
*/
public interface ChmAccessor<T> extends Serializable {
- /**
- * Parses chm accessor
- *
- * @param data
- * chm file
- * @param chmAccessor
- */
- void parse(byte[] data, T chmAccessor);
+ /**
+ * Parses chm accessor
+ *
+ * @param data
+ * chm file
+ * @param chmAccessor
+ */
+ void parse(byte[] data, T chmAccessor);
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java Wed Jun 8 21:00:27 2011
@@ -18,7 +18,6 @@ package org.apache.tika.parser.chm.acces
import java.math.BigInteger;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
import org.apache.tika.parser.chm.core.ChmCommons;
@@ -28,359 +27,369 @@ import org.apache.tika.parser.chm.core.C
* Holds chm listing entries
*/
public class ChmDirectoryListingSet {
- private List<DirectoryListingEntry> dlel;
- private byte[] data;
- private int placeHolder = -1;
- private long dataOffset = -1;
- private int controlDataIndex = -1;
- private int resetTableIndex = -1;
-
- private boolean isNotControlDataFound = true;
- private boolean isNotResetTableFound = true;
-
- /**
- * Constructs chm directory listing set
- *
- * @param data
- * byte[]
- * @param chmItsHeader
- * @param chmItspHeader
- */
- public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
- ChmItspHeader chmItspHeader) {
- setDirectoryListingEntryList(new ArrayList<DirectoryListingEntry>());
- ChmCommons.assertByteArrayNotNull(data);
- setData(data);
- enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
- }
-
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("list:=" + getDirectoryListingEntryList().toString()
- + System.getProperty("line.separator"));
- sb.append("number of list items:="
- + getDirectoryListingEntryList().size());
- return sb.toString();
- }
-
- /**
- * Returns control data index that located in List
- *
- * @return control data index
- */
- public int getControlDataIndex() {
- return controlDataIndex;
- }
-
- /**
- * Sets control data index
- *
- * @param controlDataIndex
- */
- protected void setControlDataIndex(int controlDataIndex) {
- this.controlDataIndex = controlDataIndex;
- }
-
- /**
- * Return index of reset table
- *
- * @return reset table index
- */
- public int getResetTableIndex() {
- return resetTableIndex;
- }
-
- /**
- * Sets reset table index
- *
- * @param resetTableIndex
- */
- protected void setResetTableIndex(int resetTableIndex) {
- this.resetTableIndex = resetTableIndex;
- }
-
- /**
- * Gets place holder
- *
- * @return place holder
- */
- private int getPlaceHolder() {
- return placeHolder;
- }
-
- /**
- * Sets place holder
- *
- * @param placeHolder
- */
- private void setPlaceHolder(int placeHolder) {
- this.placeHolder = placeHolder;
- }
-
- /**
- * Enumerates chm directory listing entries
- *
- * @param chmItsHeader
- * chm itsf header
- * @param chmItspHeader
- * chm itsp header
- */
- private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
- ChmItspHeader chmItspHeader) {
- try {
- int startPmgl = chmItspHeader.getIndex_head();
- int stopPmgl = chmItspHeader.getUnknown_0024();
- int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader
- .getHeader_len());
- setDataOffset(chmItsHeader.getDataOffset());
-
- /* loops over all pmgls */
- int previous_index = 0;
- byte[] dir_chunk = null;
- for (int i = startPmgl; i <= stopPmgl; i++) {
- int data_copied = ((1 + i) * (int) chmItspHeader.getBlock_len())
- + dir_offset;
- if (i == 0) {
- dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
- dir_chunk = Arrays
- .copyOfRange(getData(), dir_offset,
- (((1 + i) * (int) chmItspHeader
- .getBlock_len()) + dir_offset));
- previous_index = data_copied;
- } else {
- dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
- dir_chunk = Arrays
- .copyOfRange(getData(), previous_index,
- (((1 + i) * (int) chmItspHeader
- .getBlock_len()) + dir_offset));
- previous_index = data_copied;
- }
- enumerateOneSegment(dir_chunk);
- dir_chunk = null;
- }
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- setData(null);
- }
- }
-
- /**
- * Checks control data
- *
- * @param dle
- * chm directory listing entry
- */
- private void checkControlData(DirectoryListingEntry dle) {
- if (isNotControlDataFound) {
- if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
- setControlDataIndex(getDirectoryListingEntryList().size());
- isNotControlDataFound = false;
- }
- }
- }
-
- /**
- * Checks reset table
- *
- * @param dle
- * chm directory listing entry
- */
- private void checkResetTable(DirectoryListingEntry dle) {
- if (isNotResetTableFound) {
- if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
- setResetTableIndex(getDirectoryListingEntryList().size());
- isNotResetTableFound = false;
- }
- }
- }
-
- /**
- * Enumerates chm directory listing entries in single chm segment
- *
- * @param dir_chunk
- */
- private void enumerateOneSegment(byte[] dir_chunk) {
- try {
- if (dir_chunk != null) {
-
- int indexWorkData = ChmCommons.indexOf(dir_chunk,
- "::".getBytes());
- int indexUserData = ChmCommons.indexOf(dir_chunk,
- "/".getBytes());
-
- if (indexUserData < indexWorkData)
- setPlaceHolder(indexUserData);
- else
- setPlaceHolder(indexWorkData);
-
- if (getPlaceHolder() > 0
- && dir_chunk[getPlaceHolder() - 1] != 115) {// #{
- do {
- if (dir_chunk[getPlaceHolder() - 1] > 0) {
- DirectoryListingEntry dle = new DirectoryListingEntry();
-
- // two cases: 1. when dir_chunk[getPlaceHolder() -
- // 1] == 0x73
- // 2. when dir_chunk[getPlaceHolder() + 1] == 0x2f
- doNameCheck(dir_chunk, dle);
-
- dle.setName(new String(Arrays.copyOfRange(
- dir_chunk, getPlaceHolder(),
- (getPlaceHolder() + dle.getNameLength()))));
- checkControlData(dle);
- checkResetTable(dle);
- setPlaceHolder(getPlaceHolder()
- + dle.getNameLength());
-
- /* Sets entry type */
- if (getPlaceHolder() < dir_chunk.length
- && dir_chunk[getPlaceHolder()] == 0)
- dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
- else
- dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
-
- setPlaceHolder(getPlaceHolder() + 1);
- dle.setOffset(getEncint(dir_chunk));
- dle.setLength(getEncint(dir_chunk));
- getDirectoryListingEntryList().add(dle);
- } else
- setPlaceHolder(getPlaceHolder() + 1);
-
- } while (hasNext(dir_chunk));
- }
- }
-
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- /**
- * Checks if a name and name length are correct. If not then handles it as
- * follows: 1. when dir_chunk[getPlaceHolder() - 1] == 0x73 ('/') 2. when
- * dir_chunk[getPlaceHolder() + 1] == 0x2f ('s')
- *
- * @param dir_chunk
- * @param dle
- */
- private void doNameCheck(byte[] dir_chunk, DirectoryListingEntry dle) {
- if (dir_chunk[getPlaceHolder() - 1] == 0x73) {
- dle.setNameLength(dir_chunk[getPlaceHolder() - 1] & 0x21);
- } else if (dir_chunk[getPlaceHolder() + 1] == 0x2f) {
- dle.setNameLength(dir_chunk[getPlaceHolder()]);
- setPlaceHolder(getPlaceHolder() + 1);
- } else {
- dle.setNameLength(dir_chunk[getPlaceHolder() - 1]);
- }
- }
-
- /**
- * Checks if it's possible move further on byte[]
- *
- * @param dir_chunk
- *
- * @return boolean
- */
- private boolean hasNext(byte[] dir_chunk) {
- while (getPlaceHolder() < dir_chunk.length) {
- if (dir_chunk[getPlaceHolder()] == 47
- && dir_chunk[getPlaceHolder() + 1] != ':') {
- setPlaceHolder(getPlaceHolder());
- return true;
- } else if (dir_chunk[getPlaceHolder()] == ':'
- && dir_chunk[getPlaceHolder() + 1] == ':') {
- setPlaceHolder(getPlaceHolder());
- return true;
- } else
- setPlaceHolder(getPlaceHolder() + 1);
- }
- return false;
- }
-
- /**
- * Returns encrypted integer
- *
- * @param data_chunk
- *
- * @return
- */
- private int getEncint(byte[] data_chunk) {
- byte ob;
- BigInteger bi = BigInteger.ZERO;
- byte[] nb = new byte[1];
-
- if (getPlaceHolder() < data_chunk.length) {
- while ((ob = data_chunk[getPlaceHolder()]) < 0) {
- nb[0] = (byte) ((ob & 0x7f));
- bi = bi.shiftLeft(7).add(new BigInteger(nb));
- setPlaceHolder(getPlaceHolder() + 1);
- }
- nb[0] = (byte) ((ob & 0x7f));
- bi = bi.shiftLeft(7).add(new BigInteger(nb));
- setPlaceHolder(getPlaceHolder() + 1);
- }
- return bi.intValue();
- }
-
- /**
- * @param args
- */
- public static void main(String[] args) {
- }
-
- /**
- * Sets chm directory listing entry list
- *
- * @param dlel
- * chm directory listing entry list
- */
- public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
- this.dlel = dlel;
- }
-
- /**
- * Returns chm directory listing entry list
- *
- * @return List<DirectoryListingEntry>
- */
- public List<DirectoryListingEntry> getDirectoryListingEntryList() {
- return dlel;
- }
-
- /**
- * Sets data
- *
- * @param data
- */
- private void setData(byte[] data) {
- this.data = data;
- }
-
- /**
- * Returns data
- *
- * @return
- */
- private byte[] getData() {
- return data;
- }
-
- /**
- * Sets data offset
- *
- * @param dataOffset
- */
- private void setDataOffset(long dataOffset) {
- this.dataOffset = dataOffset;
- }
-
- /**
- * Returns data offset
- *
- * @return dataOffset
- */
- public long getDataOffset() {
- return dataOffset;
- }
+ private List<DirectoryListingEntry> dlel;
+ private byte[] data;
+ private int placeHolder = -1;
+ private long dataOffset = -1;
+ private int controlDataIndex = -1;
+ private int resetTableIndex = -1;
+
+ private boolean isNotControlDataFound = true;
+ private boolean isNotResetTableFound = true;
+
+ /**
+ * Constructs chm directory listing set
+ *
+ * @param data
+ * byte[]
+ * @param chmItsHeader
+ * @param chmItspHeader
+ */
+ public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
+ ChmItspHeader chmItspHeader) {
+ setDirectoryListingEntryList(new ArrayList<DirectoryListingEntry>());
+ ChmCommons.assertByteArrayNotNull(data);
+ setData(data);
+ enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("list:=" + getDirectoryListingEntryList().toString()
+ + System.getProperty("line.separator"));
+ sb.append("number of list items:="
+ + getDirectoryListingEntryList().size());
+ return sb.toString();
+ }
+
+ /**
+ * Returns control data index that located in List
+ *
+ * @return control data index
+ */
+ public int getControlDataIndex() {
+ return controlDataIndex;
+ }
+
+ /**
+ * Sets control data index
+ *
+ * @param controlDataIndex
+ */
+ protected void setControlDataIndex(int controlDataIndex) {
+ this.controlDataIndex = controlDataIndex;
+ }
+
+ /**
+ * Return index of reset table
+ *
+ * @return reset table index
+ */
+ public int getResetTableIndex() {
+ return resetTableIndex;
+ }
+
+ /**
+ * Sets reset table index
+ *
+ * @param resetTableIndex
+ */
+ protected void setResetTableIndex(int resetTableIndex) {
+ this.resetTableIndex = resetTableIndex;
+ }
+
+ /**
+ * Gets place holder
+ *
+ * @return place holder
+ */
+ private int getPlaceHolder() {
+ return placeHolder;
+ }
+
+ /**
+ * Sets place holder
+ *
+ * @param placeHolder
+ */
+ private void setPlaceHolder(int placeHolder) {
+ this.placeHolder = placeHolder;
+ }
+
+ /**
+ * Enumerates chm directory listing entries
+ *
+ * @param chmItsHeader
+ * chm itsf header
+ * @param chmItspHeader
+ * chm itsp header
+ */
+ private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
+ ChmItspHeader chmItspHeader) {
+ try {
+ int startPmgl = chmItspHeader.getIndex_head();
+ int stopPmgl = chmItspHeader.getUnknown_0024();
+ int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader
+ .getHeader_len());
+ setDataOffset(chmItsHeader.getDataOffset());
+
+ /* loops over all pmgls */
+ int previous_index = 0;
+ byte[] dir_chunk = null;
+ for (int i = startPmgl; i <= stopPmgl; i++) {
+ int data_copied = ((1 + i) * (int) chmItspHeader.getBlock_len())
+ + dir_offset;
+ if (i == 0) {
+ dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
+ // dir_chunk = Arrays.copyOfRange(getData(), dir_offset,
+ // (((1+i) * (int)chmItspHeader.getBlock_len()) +
+ // dir_offset));
+ dir_chunk = ChmCommons
+ .copyOfRange(getData(), dir_offset,
+ (((1 + i) * (int) chmItspHeader
+ .getBlock_len()) + dir_offset));
+ previous_index = data_copied;
+ } else {
+ dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
+ // dir_chunk = Arrays.copyOfRange(getData(), previous_index,
+ // (((1+i) * (int)chmItspHeader.getBlock_len()) +
+ // dir_offset));
+ dir_chunk = ChmCommons
+ .copyOfRange(getData(), previous_index,
+ (((1 + i) * (int) chmItspHeader
+ .getBlock_len()) + dir_offset));
+ previous_index = data_copied;
+ }
+ enumerateOneSegment(dir_chunk);
+ dir_chunk = null;
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ setData(null);
+ }
+ }
+
+ /**
+ * Checks control data
+ *
+ * @param dle
+ * chm directory listing entry
+ */
+ private void checkControlData(DirectoryListingEntry dle) {
+ if (isNotControlDataFound) {
+ if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
+ setControlDataIndex(getDirectoryListingEntryList().size());
+ isNotControlDataFound = false;
+ }
+ }
+ }
+
+ /**
+ * Checks reset table
+ *
+ * @param dle
+ * chm directory listing entry
+ */
+ private void checkResetTable(DirectoryListingEntry dle) {
+ if (isNotResetTableFound) {
+ if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
+ setResetTableIndex(getDirectoryListingEntryList().size());
+ isNotResetTableFound = false;
+ }
+ }
+ }
+
+ /**
+ * Enumerates chm directory listing entries in single chm segment
+ *
+ * @param dir_chunk
+ */
+ private void enumerateOneSegment(byte[] dir_chunk) {
+ try {
+ if (dir_chunk != null) {
+
+ int indexWorkData = ChmCommons.indexOf(dir_chunk,
+ "::".getBytes());
+ int indexUserData = ChmCommons.indexOf(dir_chunk,
+ "/".getBytes());
+
+ if (indexUserData < indexWorkData)
+ setPlaceHolder(indexUserData);
+ else
+ setPlaceHolder(indexWorkData);
+
+ if (getPlaceHolder() > 0
+ && dir_chunk[getPlaceHolder() - 1] != 115) {// #{
+ do {
+ if (dir_chunk[getPlaceHolder() - 1] > 0) {
+ DirectoryListingEntry dle = new DirectoryListingEntry();
+
+ // two cases: 1. when dir_chunk[getPlaceHolder() -
+ // 1] == 0x73
+ // 2. when dir_chunk[getPlaceHolder() + 1] == 0x2f
+ doNameCheck(dir_chunk, dle);
+
+ // dle.setName(new
+ // String(Arrays.copyOfRange(dir_chunk,
+ // getPlaceHolder(), (getPlaceHolder() +
+ // dle.getNameLength()))));
+ dle.setName(new String(ChmCommons.copyOfRange(
+ dir_chunk, getPlaceHolder(),
+ (getPlaceHolder() + dle.getNameLength()))));
+ checkControlData(dle);
+ checkResetTable(dle);
+ setPlaceHolder(getPlaceHolder()
+ + dle.getNameLength());
+
+ /* Sets entry type */
+ if (getPlaceHolder() < dir_chunk.length
+ && dir_chunk[getPlaceHolder()] == 0)
+ dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
+ else
+ dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
+
+ setPlaceHolder(getPlaceHolder() + 1);
+ dle.setOffset(getEncint(dir_chunk));
+ dle.setLength(getEncint(dir_chunk));
+ getDirectoryListingEntryList().add(dle);
+ } else
+ setPlaceHolder(getPlaceHolder() + 1);
+
+ } while (hasNext(dir_chunk));
+ }
+ }
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Checks if a name and name length are correct. If not then handles it as
+ * follows: 1. when dir_chunk[getPlaceHolder() - 1] == 0x73 ('/') 2. when
+ * dir_chunk[getPlaceHolder() + 1] == 0x2f ('s')
+ *
+ * @param dir_chunk
+ * @param dle
+ */
+ private void doNameCheck(byte[] dir_chunk, DirectoryListingEntry dle) {
+ if (dir_chunk[getPlaceHolder() - 1] == 0x73) {
+ dle.setNameLength(dir_chunk[getPlaceHolder() - 1] & 0x21);
+ } else if (dir_chunk[getPlaceHolder() + 1] == 0x2f) {
+ dle.setNameLength(dir_chunk[getPlaceHolder()]);
+ setPlaceHolder(getPlaceHolder() + 1);
+ } else {
+ dle.setNameLength(dir_chunk[getPlaceHolder() - 1]);
+ }
+ }
+
+ /**
+ * Checks if it's possible move further on byte[]
+ *
+ * @param dir_chunk
+ *
+ * @return boolean
+ */
+ private boolean hasNext(byte[] dir_chunk) {
+ while (getPlaceHolder() < dir_chunk.length) {
+ if (dir_chunk[getPlaceHolder()] == 47
+ && dir_chunk[getPlaceHolder() + 1] != ':') {
+ setPlaceHolder(getPlaceHolder());
+ return true;
+ } else if (dir_chunk[getPlaceHolder()] == ':'
+ && dir_chunk[getPlaceHolder() + 1] == ':') {
+ setPlaceHolder(getPlaceHolder());
+ return true;
+ } else
+ setPlaceHolder(getPlaceHolder() + 1);
+ }
+ return false;
+ }
+
+ /**
+ * Returns encrypted integer
+ *
+ * @param data_chunk
+ *
+ * @return
+ */
+ private int getEncint(byte[] data_chunk) {
+ byte ob;
+ BigInteger bi = BigInteger.ZERO;
+ byte[] nb = new byte[1];
+
+ if (getPlaceHolder() < data_chunk.length) {
+ while ((ob = data_chunk[getPlaceHolder()]) < 0) {
+ nb[0] = (byte) ((ob & 0x7f));
+ bi = bi.shiftLeft(7).add(new BigInteger(nb));
+ setPlaceHolder(getPlaceHolder() + 1);
+ }
+ nb[0] = (byte) ((ob & 0x7f));
+ bi = bi.shiftLeft(7).add(new BigInteger(nb));
+ setPlaceHolder(getPlaceHolder() + 1);
+ }
+ return bi.intValue();
+ }
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ }
+
+ /**
+ * Sets chm directory listing entry list
+ *
+ * @param dlel
+ * chm directory listing entry list
+ */
+ public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
+ this.dlel = dlel;
+ }
+
+ /**
+ * Returns chm directory listing entry list
+ *
+ * @return List<DirectoryListingEntry>
+ */
+ public List<DirectoryListingEntry> getDirectoryListingEntryList() {
+ return dlel;
+ }
+
+ /**
+ * Sets data
+ *
+ * @param data
+ */
+ private void setData(byte[] data) {
+ this.data = data;
+ }
+
+ /**
+ * Returns data
+ *
+ * @return
+ */
+ private byte[] getData() {
+ return data;
+ }
+
+ /**
+ * Sets data offset
+ *
+ * @param dataOffset
+ */
+ private void setDataOffset(long dataOffset) {
+ this.dataOffset = dataOffset;
+ }
+
+ /**
+ * Returns data offset
+ *
+ * @return dataOffset
+ */
+ public long getDataOffset() {
+ return dataOffset;
+ }
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java Wed Jun 8 21:00:27 2011
@@ -39,459 +39,459 @@ import org.apache.tika.parser.chm.except
*/
/* structure of ITSF headers */
public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
- private static final long serialVersionUID = 2215291838533213826L;
- private byte[] signature = new String("ITSF").getBytes(); /* 0 (ITSF) */
- private int version; /* 4 */
- private int header_len; /* 8 */
- private int unknown_000c; /* c */
- private long last_modified; /* 10 */
- private long lang_id; /* 14 */
- private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
- private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
- private long unknown_offset; /* 38 */
- private long unknown_len; /* 40 */
- private long dir_offset; /* 48 */
- private long dir_len; /* 50 */
- private long data_offset; /* 58 (Not present before V3) */
-
- /* local usage */
- private int dataRemained;
- private int currentPlace = 0;
-
- /**
- * Prints the values of ChmfHeader
- */
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(new String(getSignature()) + " ");
- sb.append(getVersion() + " ");
- sb.append(getHeaderLen() + " ");
- sb.append(getUnknown_000c() + " ");
- sb.append(getLastModified() + " ");
- sb.append(getLangId() + " ");
- sb.append(getDir_uuid() + " ");
- sb.append(getStream_uuid() + " ");
- sb.append(getUnknownOffset() + " ");
- sb.append(getUnknownLen() + " ");
- sb.append(getDirOffset() + " ");
- sb.append(getDirLen() + " ");
- sb.append(getDataOffset() + " ");
- return sb.toString();
- }
-
- /**
- * Returns a signature of itsf header
- *
- * @return itsf header
- */
- public byte[] getSignature() {
- return signature;
- }
-
- /**
- * Sets itsf header signature
- *
- * @param signature
- */
- protected void setSignature(byte[] signature) {
- this.signature = signature;
- }
-
- /**
- * Returns itsf header version
- *
- * @return itsf version
- */
- public int getVersion() {
- return version;
- }
-
- /**
- * Sets itsf version
- *
- * @param version
- */
- protected void setVersion(int version) {
- this.version = version;
- }
-
- /**
- * Returns itsf header length
- *
- * @return length
- */
- public int getHeaderLen() {
- return header_len;
- }
-
- /**
- * Sets itsf header length
- *
- * @param header_len
- */
- protected void setHeaderLen(int header_len) {
- this.header_len = header_len;
- }
-
- /**
- * Returns unknown_00c value
- *
- * @return unknown_00c
- */
- public int getUnknown_000c() {
- return unknown_000c;
- }
-
- /**
- * Sets unknown_00c
- *
- * @param unknown_000c
- */
- protected void setUnknown_000c(int unknown_000c) {
- this.unknown_000c = unknown_000c;
- }
-
- /**
- * Returns last modified date of the chm file
- *
- * @return last modified date as long
- */
- public long getLastModified() {
- return last_modified;
- }
-
- /**
- * Sets last modified date of the chm file
- *
- * @param last_modified
- */
- protected void setLastModified(long last_modified) {
- this.last_modified = last_modified;
- }
-
- /**
- * Returns language ID
- *
- * @return language_id
- */
- public long getLangId() {
- return lang_id;
- }
-
- /**
- * Sets language_id
- *
- * @param lang_id
- */
- protected void setLangId(long lang_id) {
- this.lang_id = lang_id;
- }
-
- /**
- * Returns directory uuid
- *
- * @return dir_uuid
- */
- public byte[] getDir_uuid() {
- return dir_uuid;
- }
-
- /**
- * Sets directory uuid
- *
- * @param dir_uuid
- */
- protected void setDir_uuid(byte[] dir_uuid) {
- this.dir_uuid = dir_uuid;
- }
-
- /**
- * Returns stream uuid
- *
- * @return stream_uuid
- */
- public byte[] getStream_uuid() {
- return stream_uuid;
- }
-
- /**
- * Sets stream uuid
- *
- * @param stream_uuid
- */
- protected void setStream_uuid(byte[] stream_uuid) {
- this.stream_uuid = stream_uuid;
- }
-
- /**
- * Returns unknown offset
- *
- * @return unknown_offset
- */
- public long getUnknownOffset() {
- return unknown_offset;
- }
-
- /**
- * Sets unknown offset
- *
- * @param unknown_offset
- */
- protected void setUnknownOffset(long unknown_offset) {
- this.unknown_offset = unknown_offset;
- }
-
- /**
- * Returns unknown length
- *
- * @return unknown_length
- */
- public long getUnknownLen() {
- return unknown_len;
- }
-
- /**
- * Sets unknown length
- *
- * @param unknown_len
- */
- protected void setUnknownLen(long unknown_len) {
- this.unknown_len = unknown_len;
- }
-
- /**
- * Returns directory offset
- *
- * @return directory_offset
- */
- public long getDirOffset() {
- return dir_offset;
- }
-
- /**
- * Sets directory offset
- *
- * @param dir_offset
- */
- protected void setDirOffset(long dir_offset) {
- this.dir_offset = dir_offset;
- }
-
- /**
- * Returns directory length
- *
- * @return directory_offset
- */
- public long getDirLen() {
- return dir_len;
- }
-
- /**
- * Sets directory length
- *
- * @param dir_len
- */
- protected void setDirLen(long dir_len) {
- this.dir_len = dir_len;
- }
-
- /**
- * Returns data offset
- *
- * @return data_offset
- */
- public long getDataOffset() {
- return data_offset;
- }
-
- /**
- * Sets data offset
- *
- * @param data_offset
- */
- protected void setDataOffset(long data_offset) {
- this.data_offset = data_offset;
- }
-
- /**
- * Copies 4 first bytes of the byte[]
- *
- * @param data
- * @param chmItsfHeader
- * @param count
- */
- private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader,
- int count) {
- ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
- System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
- this.setCurrentPlace(this.getCurrentPlace() + count);
- this.setDataRemained(this.getDataRemained() - count);
- }
-
- /**
- * Copies X bytes of source byte[] to the dest byte[]
- *
- * @param data
- * @param dest
- * @param count
- * @return
- */
- private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
- System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
- this.setCurrentPlace(this.getCurrentPlace() + count);
- this.setDataRemained(this.getDataRemained() - count);
- return dest;
- }
-
- /**
- * Takes 8 bytes and reverses them
- *
- * @param data
- * @param dest
- * @return
- */
- private long unmarshalUint64(byte[] data, long dest) {
- byte[] temp = new byte[8];
- int i, j;
-
- if (8 > this.getDataRemained())
- throw new ChmParsingException("8 > this.getDataRemained()");
-
- for (i = 8, j = 7; i > 0; i--) {
- temp[j--] = data[this.getCurrentPlace()];
- this.setCurrentPlace(this.getCurrentPlace() + 1);
- }
-
- dest = new BigInteger(temp).longValue();
- this.setDataRemained(this.getDataRemained() - 8);
- return dest;
- }
-
- private int unmarshalInt32(byte[] data, int dest) {
- ChmAssert.assertByteArrayNotNull(data);
-
- if (4 > this.getDataRemained())
- throw new ChmParsingException("4 > dataLenght");
- dest = data[this.getCurrentPlace()]
- | data[this.getCurrentPlace() + 1] << 8
- | data[this.getCurrentPlace() + 2] << 16
- | data[this.getCurrentPlace() + 3] << 24;
-
- this.setCurrentPlace(this.getCurrentPlace() + 4);
- this.setDataRemained(this.getDataRemained() - 4);
- return dest;
- }
-
- private long unmarshalUInt32(byte[] data, long dest) {
- ChmAssert.assertByteArrayNotNull(data);
- if (4 > getDataRemained())
- throw new ChmParsingException("4 > dataLenght");
- dest = data[this.getCurrentPlace()]
- | data[this.getCurrentPlace() + 1] << 8
- | data[this.getCurrentPlace() + 2] << 16
- | data[this.getCurrentPlace() + 3] << 24;
-
- setDataRemained(this.getDataRemained() - 4);
- this.setCurrentPlace(this.getCurrentPlace() + 4);
- return dest;
- }
-
- public static void main(String[] args) {
- }
-
- /**
- * Sets data remained to be processed
- *
- * @param dataRemained
- */
- private void setDataRemained(int dataRemained) {
- this.dataRemained = dataRemained;
- }
-
- /**
- * Returns data remained
- *
- * @return data_remainned
- */
- private int getDataRemained() {
- return dataRemained;
- }
-
- /**
- * Sets current place in the byte[]
- *
- * @param currentPlace
- */
- private void setCurrentPlace(int currentPlace) {
- this.currentPlace = currentPlace;
- }
-
- /**
- * Returns current place in the byte[]
- *
- * @return current place
- */
- private int getCurrentPlace() {
- return currentPlace;
- }
-
- // @Override
- public void parse(byte[] data, ChmItsfHeader chmItsfHeader) {
- if (data.length < ChmConstants.CHM_ITSF_V2_LEN
- || data.length > ChmConstants.CHM_ITSF_V3_LEN)
- throw new ChmParsingException(
- "we only know how to deal with the 0x58 and 0x60 byte structures");
-
- chmItsfHeader.setDataRemained(data.length);
- chmItsfHeader.unmarshalCharArray(data, chmItsfHeader,
- ChmConstants.CHM_SIGNATURE_LEN);
- chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data,
- chmItsfHeader.getVersion()));
- chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data,
- chmItsfHeader.getHeaderLen()));
- chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data,
- chmItsfHeader.getUnknown_000c()));
- chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data,
- chmItsfHeader.getLastModified()));
- chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data,
- chmItsfHeader.getLangId()));
- chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data,
- chmItsfHeader.getDir_uuid(), 16));
- chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data,
- chmItsfHeader.getStream_uuid(), 16));
- chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data,
- chmItsfHeader.getUnknownOffset()));
- chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data,
- chmItsfHeader.getUnknownLen()));
- chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data,
- chmItsfHeader.getDirOffset()));
- chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data,
- chmItsfHeader.getDirLen()));
-
- if (!new String(chmItsfHeader.getSignature()).equals(ChmConstants.ITSF))
- throw new ChmParsingException("seems not valid file");
- if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
- if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
- throw new ChmParsingException("something wrong with header");
- } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
- if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
- throw new ChmParsingException("unknown v3 header lenght");
- } else
- throw new ChmParsingException("unsupported chm format");
-
- /*
- * now, if we have a V3 structure, unmarshal the rest, otherwise,
- * compute it
- */
- if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
- if (chmItsfHeader.getDataRemained() >= 0)
- chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
- + chmItsfHeader.getDirLen());
- else
- throw new ChmParsingException(
- "cannot set data offset, no data remained");
- } else
- chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
- + chmItsfHeader.getDirLen());
- }
+ private static final long serialVersionUID = 2215291838533213826L;
+ private byte[] signature = new String("ITSF").getBytes(); /* 0 (ITSF) */
+ private int version; /* 4 */
+ private int header_len; /* 8 */
+ private int unknown_000c; /* c */
+ private long last_modified; /* 10 */
+ private long lang_id; /* 14 */
+ private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
+ private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
+ private long unknown_offset; /* 38 */
+ private long unknown_len; /* 40 */
+ private long dir_offset; /* 48 */
+ private long dir_len; /* 50 */
+ private long data_offset; /* 58 (Not present before V3) */
+
+ /* local usage */
+ private int dataRemained;
+ private int currentPlace = 0;
+
+ /**
+ * Prints the values of ChmfHeader
+ */
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(new String(getSignature()) + " ");
+ sb.append(getVersion() + " ");
+ sb.append(getHeaderLen() + " ");
+ sb.append(getUnknown_000c() + " ");
+ sb.append(getLastModified() + " ");
+ sb.append(getLangId() + " ");
+ sb.append(getDir_uuid() + " ");
+ sb.append(getStream_uuid() + " ");
+ sb.append(getUnknownOffset() + " ");
+ sb.append(getUnknownLen() + " ");
+ sb.append(getDirOffset() + " ");
+ sb.append(getDirLen() + " ");
+ sb.append(getDataOffset() + " ");
+ return sb.toString();
+ }
+
+ /**
+ * Returns a signature of itsf header
+ *
+ * @return itsf header
+ */
+ public byte[] getSignature() {
+ return signature;
+ }
+
+ /**
+ * Sets itsf header signature
+ *
+ * @param signature
+ */
+ protected void setSignature(byte[] signature) {
+ this.signature = signature;
+ }
+
+ /**
+ * Returns itsf header version
+ *
+ * @return itsf version
+ */
+ public int getVersion() {
+ return version;
+ }
+
+ /**
+ * Sets itsf version
+ *
+ * @param version
+ */
+ protected void setVersion(int version) {
+ this.version = version;
+ }
+
+ /**
+ * Returns itsf header length
+ *
+ * @return length
+ */
+ public int getHeaderLen() {
+ return header_len;
+ }
+
+ /**
+ * Sets itsf header length
+ *
+ * @param header_len
+ */
+ protected void setHeaderLen(int header_len) {
+ this.header_len = header_len;
+ }
+
+ /**
+ * Returns unknown_00c value
+ *
+ * @return unknown_00c
+ */
+ public int getUnknown_000c() {
+ return unknown_000c;
+ }
+
+ /**
+ * Sets unknown_00c
+ *
+ * @param unknown_000c
+ */
+ protected void setUnknown_000c(int unknown_000c) {
+ this.unknown_000c = unknown_000c;
+ }
+
+ /**
+ * Returns last modified date of the chm file
+ *
+ * @return last modified date as long
+ */
+ public long getLastModified() {
+ return last_modified;
+ }
+
+ /**
+ * Sets last modified date of the chm file
+ *
+ * @param last_modified
+ */
+ protected void setLastModified(long last_modified) {
+ this.last_modified = last_modified;
+ }
+
+ /**
+ * Returns language ID
+ *
+ * @return language_id
+ */
+ public long getLangId() {
+ return lang_id;
+ }
+
+ /**
+ * Sets language_id
+ *
+ * @param lang_id
+ */
+ protected void setLangId(long lang_id) {
+ this.lang_id = lang_id;
+ }
+
+ /**
+ * Returns directory uuid
+ *
+ * @return dir_uuid
+ */
+ public byte[] getDir_uuid() {
+ return dir_uuid;
+ }
+
+ /**
+ * Sets directory uuid
+ *
+ * @param dir_uuid
+ */
+ protected void setDir_uuid(byte[] dir_uuid) {
+ this.dir_uuid = dir_uuid;
+ }
+
+ /**
+ * Returns stream uuid
+ *
+ * @return stream_uuid
+ */
+ public byte[] getStream_uuid() {
+ return stream_uuid;
+ }
+
+ /**
+ * Sets stream uuid
+ *
+ * @param stream_uuid
+ */
+ protected void setStream_uuid(byte[] stream_uuid) {
+ this.stream_uuid = stream_uuid;
+ }
+
+ /**
+ * Returns unknown offset
+ *
+ * @return unknown_offset
+ */
+ public long getUnknownOffset() {
+ return unknown_offset;
+ }
+
+ /**
+ * Sets unknown offset
+ *
+ * @param unknown_offset
+ */
+ protected void setUnknownOffset(long unknown_offset) {
+ this.unknown_offset = unknown_offset;
+ }
+
+ /**
+ * Returns unknown length
+ *
+ * @return unknown_length
+ */
+ public long getUnknownLen() {
+ return unknown_len;
+ }
+
+ /**
+ * Sets unknown length
+ *
+ * @param unknown_len
+ */
+ protected void setUnknownLen(long unknown_len) {
+ this.unknown_len = unknown_len;
+ }
+
+ /**
+ * Returns directory offset
+ *
+ * @return directory_offset
+ */
+ public long getDirOffset() {
+ return dir_offset;
+ }
+
+ /**
+ * Sets directory offset
+ *
+ * @param dir_offset
+ */
+ protected void setDirOffset(long dir_offset) {
+ this.dir_offset = dir_offset;
+ }
+
+ /**
+ * Returns directory length
+ *
+ * @return directory_offset
+ */
+ public long getDirLen() {
+ return dir_len;
+ }
+
+ /**
+ * Sets directory length
+ *
+ * @param dir_len
+ */
+ protected void setDirLen(long dir_len) {
+ this.dir_len = dir_len;
+ }
+
+ /**
+ * Returns data offset
+ *
+ * @return data_offset
+ */
+ public long getDataOffset() {
+ return data_offset;
+ }
+
+ /**
+ * Sets data offset
+ *
+ * @param data_offset
+ */
+ protected void setDataOffset(long data_offset) {
+ this.data_offset = data_offset;
+ }
+
+ /**
+ * Copies 4 first bytes of the byte[]
+ *
+ * @param data
+ * @param chmItsfHeader
+ * @param count
+ */
+ private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader,
+ int count) {
+ ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
+ System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
+ this.setCurrentPlace(this.getCurrentPlace() + count);
+ this.setDataRemained(this.getDataRemained() - count);
+ }
+
+ /**
+ * Copies X bytes of source byte[] to the dest byte[]
+ *
+ * @param data
+ * @param dest
+ * @param count
+ * @return
+ */
+ private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
+ System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
+ this.setCurrentPlace(this.getCurrentPlace() + count);
+ this.setDataRemained(this.getDataRemained() - count);
+ return dest;
+ }
+
+ /**
+ * Takes 8 bytes and reverses them
+ *
+ * @param data
+ * @param dest
+ * @return
+ */
+ private long unmarshalUint64(byte[] data, long dest) {
+ byte[] temp = new byte[8];
+ int i, j;
+
+ if (8 > this.getDataRemained())
+ throw new ChmParsingException("8 > this.getDataRemained()");
+
+ for (i = 8, j = 7; i > 0; i--) {
+ temp[j--] = data[this.getCurrentPlace()];
+ this.setCurrentPlace(this.getCurrentPlace() + 1);
+ }
+
+ dest = new BigInteger(temp).longValue();
+ this.setDataRemained(this.getDataRemained() - 8);
+ return dest;
+ }
+
+ private int unmarshalInt32(byte[] data, int dest) {
+ ChmAssert.assertByteArrayNotNull(data);
+
+ if (4 > this.getDataRemained())
+ throw new ChmParsingException("4 > dataLenght");
+ dest = data[this.getCurrentPlace()]
+ | data[this.getCurrentPlace() + 1] << 8
+ | data[this.getCurrentPlace() + 2] << 16
+ | data[this.getCurrentPlace() + 3] << 24;
+
+ this.setCurrentPlace(this.getCurrentPlace() + 4);
+ this.setDataRemained(this.getDataRemained() - 4);
+ return dest;
+ }
+
+ private long unmarshalUInt32(byte[] data, long dest) {
+ ChmAssert.assertByteArrayNotNull(data);
+ if (4 > getDataRemained())
+ throw new ChmParsingException("4 > dataLenght");
+ dest = data[this.getCurrentPlace()]
+ | data[this.getCurrentPlace() + 1] << 8
+ | data[this.getCurrentPlace() + 2] << 16
+ | data[this.getCurrentPlace() + 3] << 24;
+
+ setDataRemained(this.getDataRemained() - 4);
+ this.setCurrentPlace(this.getCurrentPlace() + 4);
+ return dest;
+ }
+
+ public static void main(String[] args) {
+ }
+
+ /**
+ * Sets data remained to be processed
+ *
+ * @param dataRemained
+ */
+ private void setDataRemained(int dataRemained) {
+ this.dataRemained = dataRemained;
+ }
+
+ /**
+ * Returns data remained
+ *
+ * @return data_remainned
+ */
+ private int getDataRemained() {
+ return dataRemained;
+ }
+
+ /**
+ * Sets current place in the byte[]
+ *
+ * @param currentPlace
+ */
+ private void setCurrentPlace(int currentPlace) {
+ this.currentPlace = currentPlace;
+ }
+
+ /**
+ * Returns current place in the byte[]
+ *
+ * @return current place
+ */
+ private int getCurrentPlace() {
+ return currentPlace;
+ }
+
+ // @Override
+ public void parse(byte[] data, ChmItsfHeader chmItsfHeader) {
+ if (data.length < ChmConstants.CHM_ITSF_V2_LEN
+ || data.length > ChmConstants.CHM_ITSF_V3_LEN)
+ throw new ChmParsingException(
+ "we only know how to deal with the 0x58 and 0x60 byte structures");
+
+ chmItsfHeader.setDataRemained(data.length);
+ chmItsfHeader.unmarshalCharArray(data, chmItsfHeader,
+ ChmConstants.CHM_SIGNATURE_LEN);
+ chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data,
+ chmItsfHeader.getVersion()));
+ chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data,
+ chmItsfHeader.getHeaderLen()));
+ chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data,
+ chmItsfHeader.getUnknown_000c()));
+ chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data,
+ chmItsfHeader.getLastModified()));
+ chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data,
+ chmItsfHeader.getLangId()));
+ chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data,
+ chmItsfHeader.getDir_uuid(), 16));
+ chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data,
+ chmItsfHeader.getStream_uuid(), 16));
+ chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data,
+ chmItsfHeader.getUnknownOffset()));
+ chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data,
+ chmItsfHeader.getUnknownLen()));
+ chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data,
+ chmItsfHeader.getDirOffset()));
+ chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data,
+ chmItsfHeader.getDirLen()));
+
+ if (!new String(chmItsfHeader.getSignature()).equals(ChmConstants.ITSF))
+ throw new ChmParsingException("seems not valid file");
+ if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
+ if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
+ throw new ChmParsingException("something wrong with header");
+ } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
+ if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
+ throw new ChmParsingException("unknown v3 header lenght");
+ } else
+ throw new ChmParsingException("unsupported chm format");
+
+ /*
+ * now, if we have a V3 structure, unmarshal the rest, otherwise,
+ * compute it
+ */
+ if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
+ if (chmItsfHeader.getDataRemained() >= 0)
+ chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
+ + chmItsfHeader.getDirLen());
+ else
+ throw new ChmParsingException(
+ "cannot set data offset, no data remained");
+ } else
+ chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
+ + chmItsfHeader.getDirLen());
+ }
}